From 120ba33ff99c092464e485b11080297eddd6a186 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Apr 02 2020 09:41:18 +0000 Subject: [PATCH 1/2] Simplify co-package query. It fetches 10 extra fields, so drop those, and then let sqlite handle uniqueness. --- diff --git a/mdapi/__init__.py b/mdapi/__init__.py index e5b482a..4dfc286 100644 --- a/mdapi/__init__.py +++ b/mdapi/__init__.py @@ -153,7 +153,7 @@ async def _expand_pkg_info(pkgs, branch, repotype=None): if pkg.rpm_sourcerpm: async with db.execute(GET_CO_PACKAGE, (pkg.rpm_sourcerpm,)) as cursor: copkgs = await cursor.fetchall() - out['co-packages'] = list({cpkg[2] for cpkg in copkgs}) + out['co-packages'] = [cpkg[0] for cpkg in copkgs] else: out['co-packages'] = [] out['repo'] = repotype if repotype else 'release' diff --git a/mdapi/db.py b/mdapi/db.py index 95c6d90..4f132d5 100644 --- a/mdapi/db.py +++ b/mdapi/db.py @@ -46,17 +46,7 @@ GET_PACKAGE_INFO = """SELECT rowid, FROM {} WHERE pkgKey = ?""" -GET_CO_PACKAGE = """SELECT pkgKey, - pkgId, - name, - rpm_sourcerpm, - epoch, - version, - release, - arch, - summary, - description, - url +GET_CO_PACKAGE = """SELECT DISTINCT(name) FROM packages WHERE rpm_sourcerpm = ?""" From 1081cbbfe080557c30cade030f278af34f4b1951 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Apr 05 2020 04:18:52 +0000 Subject: [PATCH 2/2] Add sqlite index to srpm column. --- diff --git a/mdapi-get_repo_md b/mdapi-get_repo_md index 2992edb..1471bd3 100755 --- a/mdapi-get_repo_md +++ b/mdapi-get_repo_md @@ -192,6 +192,16 @@ def decompress_db(name, archive, location): raise NotImplementedError(archive) +def index_db(name, tempdb): + print(f'{name.ljust(padding)} Indexing file: {tempdb}') + + if tempdb.endswith('primary.sqlite'): + conn = sqlite3.connect(tempdb) + conn.execute('CREATE INDEX packageSource ON packages (rpm_sourcerpm)') + conn.commit() + conn.close() + + def compare_dbs(name, db1, db2, cache1, cache2): print(f'{name.ljust(padding)} Comparing {db1} and {db2}') @@ -412,6 +422,7 @@ def process_repo(repo): download_db(name, repomd_url, archive) decompress_db(name, archive, tempdb) + index_db(name, tempdb) if PUBLISH_CHANGES: packages = compare_dbs(name, tempdb, destfile, cache1, cache2) publish_changes(name, packages, repomd_url)