PR#7: rewrite it all - fedora-l10n/fedora-localization-statistics

		`@@ -4,32 +4,29 @@`

		`# Requirements`

		- `dnf install translate-toolkit podman`
		+ `dnf install podman`

		`- ## Create needed folders`
		`+ ## Create needed container images`

		`+ Each release need is own image.`
		`+`
		+ ```bash
		`+ podman build . -f docker/Dockerfile.$release -t fedlocstats:$release`
		```
		`- mkdir -p ./src.rpms/f30/ ./results/f30/`
		`- virtualenv venv`
		`- source venv/bin/activate`
		`- pip install -r requirements.txt`
		`+`
		+ ```bash
		`+ podman build . -f docker/Dockerfile.31 -t fedlocstats:31`
		`+ podman build . -f docker/Dockerfile.32 -t fedlocstats:32`
		`+ podman build . -f docker/Dockerfile.33 -t fedlocstats:33`
		```

		`# Run the scripts`

		`- ## Get package list`
		`-`
		`- This step is for now manual, I took list of DNF packages from Koji:`
		`-`
		`- * For F30: https://koji.fedoraproject.org/koji/buildinfo?buildID=1252912`
		`- * For F31: https://kojipkgs.fedoraproject.org//packages/Fedora-Workstation-Live/30/20190421.n.0/data/logs/image/x86_64/root.log`
		`-`
		`- ## Get the rpm sources`
		`-`
		- `./download-f%%-srpm-in-container.sh` where %% is the fedora version (30 or 31)
		+ ```bash
		`+ podman run -it --rm -v ./:/src:z -v ./srpms:/srpms:z --tmpfs /tmp:size=4G fedlocstats:$release $script`
		+ ```

		`- Downloading the file is done inside a container so we can produce stats even if`
		`- using Fedora 29. This represents about 7 GB for Fedora 30 and takes some time.`
		+ with `$script`, one of the following:

		`## Compute data`

		`@@ -78,18 +75,18 @@`
		`## Ideas`

		`1. CLDR supplementalData.xml: https://github.com/unicode-org/cldr/blob/master/common/supplemental/supplementalData.xml`
		`- 1. use territoryContainment to build geographic groups`
		`- 2. use languageData to detect default script`
		`- 3. use languageData to have basic stats about territories`
		`- 4. use territoryInfo to have advanced stats about territories`
		`+ 1. use territoryContainment to build geographic groups`
		`+ 2. use languageData to detect default script`
		`+ 3. use languageData to have basic stats about territories`
		`+ 4. use territoryInfo to have advanced stats about territories`
		`2. CLDR supplementalMetadata.xml: https://github.com/unicode-org/cldr/blob/master/common/supplemental/supplementalMetadata.xml`
		`- 1. use the replacement values harmonize content`
		`+ 1. use the replacement values harmonize content`
		`3. CLDR likelySubtags.xml: https://github.com/unicode-org/cldr/blob/master/common/supplemental/likelySubtags.xml`
		`- 1. use the replacement advanced harmonization?`
		`+ 1. use the replacement advanced harmonization?`
		`4. CLDR languageInfo.xml: https://github.com/unicode-org/cldr/blob/master/common/supplemental/languageInfo.xml`
		`- 1. can we say if language is >= 90% close to another one, we can consider we propagate translation statistics?`
		`+ 1. can we say if language is >= 90% close to another one, we can consider we propagate translation statistics?`
		`5. CLDR languageGroup.xml: https://github.com/unicode-org/cldr/blob/master/common/supplemental/languageGroup.xml`
		`- 1. what is it?`
		`+ 1. what is it?`

		`automatic calculation (group by territory + spoken percentage * spoken )`

build.py

file modified

+231 -86

		`@@ -1,71 +1,166 @@`
		`#!/usr/bin/env python3`
		`- """ For each packages in src.rpms folder :"""`
		`- """ extract srpm """`
		`- """ run the translation_finder """`
		`- """ Then, concat csv files """`
		`-`
		`+ # For each packages in src.rpms folder :`
		`+ # extract srpm`
		`+ # run the translation_finder`
		`+ # Then, concat csv files`
		`import argparse`
		`+ import dnf`
		`+ import json`
		`import glob`
		`+ import distro`
		`import os`
		`+ import rpm`
		`import subprocess`
		`import tempfile`
		`- import yaml`
		`+ import time`
		`+ import datetime as dt`
		`+ import re`

		`- from shutil import copyfile, copy2`
		`+ from shutil import copyfile`
		`from translation_finder import discover`
		`+ from urllib.parse import urlparse`
		`+`

		`def main():`
		`"""Handle params"""`

		`parser = argparse.ArgumentParser(`
		`description="Computes stats for each srpm detected")`
		`- parser.add_argument("--srpm", required=False,`
		`- help="Only work on one SRPM, if selected")`
		`- parser.add_argument("--offset", required=False, type=int,`
		`- help="Provide the number of packages to ignore")`
		`- parser.add_argument("--release", required=True, type=int, default=31,`
		`- choices=[30, 31],`
		`- help="Provide the Fedora release to analyze")`
		`+ parser.add_argument("filter", default=None, nargs='?',`
		`+ help="package name filter (regex)")`
		`+ parser.add_argument("-k", "--keep-srpms", default=False,`
		`+ action='store_true', dest='keep',`
		`+ help="Keep SRPMs in /srpms")`
		`+ parser.add_argument("-f", "--force", default=False,`
		`+ action='store_true', dest='force',`
		`+ help="Ignore past progression state")`
		`args = parser.parse_args()`

		`- srpm_folder="./src.rpms/f{v}/".format(v=args.release)`
		`- result_folder="./results/f{v}/".format(v=args.release)`
		`- tm_folder="./tm/f{v}/".format(v=args.release)`
		`+ srpm_regex = None`
		`+ if args.filter:`
		`+ srpm_regex = re.compile("^{}$".format(args.filter))`
		`+`
		`+ (distname, distrel, distid) = distro.linux_distribution()`
		`+ result_folder = "./results/f{v}/".format(v=distrel)`
		`+ tm_folder = "./tm/f{v}/".format(v=distrel)`
		`+ srpms_path = "/srpms"`
		`+`
		`+ if not os.path.exists(result_folder):`
		`+ os.makedirs(result_folder)`
		`+ if not os.path.exists(tm_folder):`
		`+ os.makedirs(tm_folder)`
		`+`
		`+ processing_file = os.path.join(result_folder, "data.json")`
		`+ srpm_list_file = os.path.join(result_folder, "srpm.txt")`
		`+ url_list = None`
		`+`
		`+ if os.path.isfile(srpm_list_file):`
		`+ list_file_stats = os.stat(srpm_list_file)`
		`+ last_mod = dt.datetime.fromtimestamp(list_file_stats.st_mtime)`
		`+ if dt.datetime.now() - last_mod < dt.timedelta(hours=24):`
		`+ with open(srpm_list_file) as f:`
		`+ url_list = f.readlines()`
		`+`
		`+ if not url_list:`
		`+ print("Fetching SRPMs url list")`
		`+ p = subprocess.Popen('dnf download --source --skip-broken --url "*" \| grep src.rpm',`
		`+ stdout=subprocess.PIPE,`
		`+ shell=True)`
		`+`
		`+ urls = str(p.stdout.read(), "utf-8")`
		`+ with open(srpm_list_file, 'w') as f:`
		`+ f.write(urls)`
		`+ url_list = urls.splitlines()`
		`+`
		`+ # Load processing data, if any`
		`+ try:`
		`+ with open(processing_file) as f:`
		`+ data = json.load(f)`
		`+ except BaseException:`
		`+ data = {}`

		`- pkgs = []`
		`- for (dirpath, dirnames, filenames) in os.walk(srpm_folder):`
		`- pkgs.extend(filenames)`
		`- break`
		`count = 0`
		`+ total_urls = len(url_list)`
		`+`
		`+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmp:`
		`+ for line in url_list:`
		`+ count += 1`
		`+ url = urlparse(line.strip())`
		`+ if not url.scheme:`
		`+ continue`
		`+ srpm_filename = os.path.basename(url.path)`
		`+ srpm_data = dnf.subject.Subject(srpm_filename)`
		`+ package = srpm_data.get_nevra_possibilities(forms=1)[0]`
		`+`
		`+ if srpm_regex and not srpm_regex.match(package.name):`
		`+ continue`
		`+`
		`+ if package.name in data and not args.force:`
		`+ # Compare version`
		`+ known_package = dnf.subject.Subject(`
		`+ data[package.name]["srpm"]).get_nevra_possibilities(forms=1)[0]`
		`+ if rpm.labelCompare(`
		`+ (package.epoch,`
		`+ package.version,`
		`+ package.release),`
		`+ (known_package.epoch,`
		`+ known_package.version,`
		`+ known_package.release)) <= 0:`
		`+ print("{c}/{t} skipping already processed {n}".format(`
		`+ c=count, t=total_urls, n=package.name))`
		`+ continue`

		`- if args.srpm:`
		`- print("argument srpm is provided: " + args.srpm)`
		`- with tempfile.TemporaryDirectory() as tmp:`
		`- package = [x for x in pkgs if x == args.srpm][0]`
		`- srpm_file = "{srpm}/{a}".format(srpm=srpm_folder, a=package)`
		`- extract_srpm(tmp, srpm_file, result_folder)`
		`- discover_translations(tmp, package, result_folder, tm_folder)`
		`- else:`
		`- with tempfile.TemporaryDirectory() as tmp:`
		`- if args.offset:`
		`- pkgs = pkgs[slice(args.offset, len(pkgs))]`
		`-`
		`- for package in pkgs:`
		`- count += 1`
		`- print("")`
		`- print("{c}/{m}".format(c=count, m=len(pkgs)))`
		`-`
		`- if package.startswith("libreoffice"):`
		`- print("package ignored because really slow, please use --srpm")`
		`+ print("{c}/{t} processing {n}".format(`
		`+ c=count, t=total_urls, n=package.name))`
		`+`
		`+ srpm_path = os.path.join(srpms_path, srpm_filename)`
		`+ if not os.path.isfile(srpm_path):`
		`+ print("downloading {}".format(srpm_filename))`
		`+ if url.scheme == "rsync":`
		`+ dl = subprocess.run(`
		`+ ['rsync', url.geturl(), srpms_path],`
		`+ stdin=subprocess.PIPE,`
		`+ stdout=subprocess.PIPE,`
		`+ stderr=subprocess.STDOUT)`
		`+ else:`
		`+ dl = subprocess.run(`
		`+ ['curl', '-L', '--remote-name', url.geturl()],`
		`+ stdin=subprocess.PIPE,`
		`+ stdout=subprocess.PIPE,`
		`+ stderr=subprocess.STDOUT,`
		`+ cwd=srpms_path)`
		`+`
		`+ if dl.returncode:`
		`+ print("error downloading srpm:")`
		`+ print(dl.stdout)`
		`continue`

		`- srpm_file = "{srpm}/{a}".format(srpm=srpm_folder, a=package)`
		`- extract_srpm(tmp, srpm_file, result_folder)`
		`- discover_translations(tmp, package, result_folder, tm_folder)`
		`+ extract_srpm(tmp, srpm_path, result_folder)`
		`+ (tsearch, tcopy, results) = discover_translations(`
		`+ tmp, package.name, result_folder, tm_folder)`
		`+`
		`+ if not args.keep:`
		`+ os.unlink(srpm_path)`
		`+`
		`+ # save processed srpm name & version`
		`+ data[package.name] = {`
		`+ "srpm": srpm_filename,`
		`+ "tsearch": tsearch,`
		`+ "tcopy": tcopy,`
		`+ "results": results}`
		`+`
		`+ with open(processing_file, "w") as f:`
		`+ json.dump(data, f, indent=2)`
		`+ print("")`
		`+`
		`+ # if package.startswith("libreoffice"):`
		`+ # print("package ignored because really slow, please use --srpm")`
		`+ # continue`

		`subprocess.run(['./concat_csv.sh', result_folder],`
		`check=True)`

		`+`
		`def extract_srpm(tmp, name, result_folder):`
		`"""extract srpm page"""`
		`print("extract_srpm: " + name)`
		`@@ -79,40 +174,55 @@`
		`out.close()`
		`error.close()`

		`+`
		`def discover_translations(tmp, name, result_folder, tm_folder):`
		`"""find po file"""`
		`- print("discover_translations: "+tmp)`
		`+ print("discover_translations: " + tmp)`
		`translation_files = []`
		`+ tsearch = 0`
		`+ tcopy = 0`
		`+ cresults = 0`

		`- # Check if there is a manual rule (like libreoffice)`
		`- manual = "manual-discover/" + name + ".json"`
		`- if os.path.isfile(manual):`
		`- with open(manual, 'r') as stream:`
		`- translation_files = yaml.load(stream, Loader=yaml.SafeLoader)`
		`- else:`
		`- try:`
		`- translation_files = discover(tmp)`
		`- except OSError:`
		`- print("error while searching for new")`
		`- with open(result_folder + "/errors.txt", "a") as file:`
		`- file.write(name + " on discover_translations\n")`
		`-`
		`- print(translation_files)`
		`+ tsearch = time.time()`
		`+ try:`
		`+ translation_files = discover(tmp)`
		`+ except OSError:`
		`+ with open(result_folder + "/errors.txt", "a") as file:`
		`+ file.write(name + " on discover_translations\n")`
		`+`
		`+ tsearch = round(time.time() - tsearch, 1)`
		`+`
		`+ tcopy = time.time()`

		`if translation_files:`
		`for translation in translation_files:`
		`- # TODO: multiple translation files for same package gnome-clocks-3.32.0-1.fc30.src.rpm`
		`+ # TODO: multiple translation files for same package`
		`+ # gnome-clocks-3.32.0-1.fc30.src.rpm`
		`if translation["file_format"] == "po":`
		`- get_po_translation_level(tmp, translation, name, result_folder, tm_folder)`
		`+ get_po_translation_level(`
		`+ tmp, translation, name, result_folder, tm_folder)`
		`elif translation["file_format"] == "ts":`
		`get_ts_translation_level(tmp, translation, name, result_folder)`
		`elif translation["file_format"] == "json":`
		`- get_json_translation_level(tmp, translation, name, result_folder)`
		`+ get_json_translation_level(`
		`+ tmp, translation, name, result_folder)`
		`elif translation["file_format"] == "auto":`
		`# it's a detection of .tx configuration`
		`continue`
		`else:`
		`- unknown_format(tmp, translation, name, translation["file_format"], result_folder)`
		`+ unknown_format(`
		`+ translation,`
		`+ name,`
		`+ translation["file_format"],`
		`+ result_folder)`
		`+ tcopy = round(time.time() - tcopy, 1)`
		`+`
		`+ cresults = dict()`
		`+ for file in translation_files:`
		`+ cresults[file["file_format"]] = cresults.get(file["file_format"], 0) + 1`
		`+`
		`+ return (tsearch, tcopy, cresults)`
		`+`

		`def get_po_translation_level(path, mask, name, result_folder, tm_folder):`
		`filemask = mask["filemask"]`
		`@@ -127,13 +237,18 @@`
		`error.close()`

		`# Copy translation files in translation memory`
		`- for po in glob.glob(path +"/"+ filemask):`
		`- dest = tm_folder +"/"+ name +"/"+ filemask.split("*")[0]`
		`+ for po in glob.glob(path + "/" + filemask):`
		`+ dest = tm_folder + "/" + name + "/" + filemask.split("*")[0]`
		`os.makedirs(dest, exist_ok=True)`
		`- copy2(po, dest)`
		`-`
		`- subprocess.run(["sed", "-i", "-e", "s\|{p}\|.\|g".format(p=path),`
		`- result_folder + '/{p}.errors.txt'.format(p=name)], check=True)`
		`+ # use copyfile instead of copy2 to handle read-only files in rpm`
		`+ copyfile(po, os.path.join(dest, os.path.basename(po)))`
		`+`
		`+ subprocess.run(["sed",`
		`+ "-i",`
		`+ "-e",`
		`+ "s\|{p}\|.\|g".format(p=path),`
		`+ result_folder + '/{p}.errors.txt'.format(p=name)],`
		`+ check=True)`


		`def get_ts_translation_level(path, mask, name, result_folder):`
		`@@ -148,8 +263,13 @@`
		`stats.close()`
		`error.close()`

		`- subprocess.run(["sed", "-i", "-e", "s\|{p}\|.\|g".format(p=path),`
		`- result_folder + '/{p}.errors.txt'.format(p=name)], check=True)`
		`+ subprocess.run(["sed",`
		`+ "-i",`
		`+ "-e",`
		`+ "s\|{p}\|.\|g".format(p=path),`
		`+ result_folder + '/{p}.errors.txt'.format(p=name)],`
		`+ check=True)`
		`+`

		`def get_json_translation_level(path, mask, name, result_folder):`
		`filemask = mask["filemask"]`
		`@@ -159,41 +279,66 @@`
		`error = open(result_folder + '/{p}.errors.txt'.format(p=name), 'a')`

		`# move only related json files to a temporary folder`
		`- with tempfile.TemporaryDirectory() as tmpjson:`
		`- for filename in glob.iglob(path+"/"+filemask):`
		`+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:`
		`+ for filename in glob.iglob(path + "/" + filemask):`
		`# if filesare in language subfolder, reproduce the hierarchy`
		`- dest = os.path.join(*(os.path.dirname(filename).split(os.path.sep)[3:]))`
		`+ dest = os.path.join(`
		`+ *(os.path.dirname(filename).split(os.path.sep)[3:]))`
		`os.makedirs(tmpjson + "/" + dest, exist_ok=True)`

		`- copyfile(filename, tmpjson + "/" + dest + "/" + os.path.basename(filename))`
		`+ copyfile(`
		`+ filename,`
		`+ tmpjson +`
		`+ "/" +`
		`+ dest +`
		`+ "/" +`
		`+ os.path.basename(filename))`

		`# convert json files to po files`
		`- with tempfile.TemporaryDirectory() as tmppo:`
		`+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmppo:`
		`# use existing template, in not existing (probably a bug), try "en"`
		`- template_file = tmpjson+"/"+mask.get("template", filemask.replace("*", "en"))`
		`+ template_file = tmpjson + "/" + \`
		`+ mask.get("template", filemask.replace("*", "en"))`

		`if os.path.isfile(template_file):`
		`- subprocess.run(["json2po", "-t", template_file, tmpjson, tmppo, "--progress=none"],`
		`- stderr=error, check=True, cwd=tmppo)`
		`+ subprocess.run(["json2po",`
		`+ "-t",`
		`+ template_file,`
		`+ tmpjson,`
		`+ tmppo,`
		`+ "--progress=none"],`
		`+ stderr=error,`
		`+ check=True,`
		`+ cwd=tmppo)`

		`# compute stats`
		`- subprocess.run(["pocount", filemask.split("*")[0], "--csv"],`
		`- stdout=stats, stderr=error, check=True, cwd=tmppo)`
		`+ subprocess.run(["pocount",`
		`+ filemask.split("*")[0],`
		`+ "--csv"],`
		`+ stdout=stats,`
		`+ stderr=error,`
		`+ check=True,`
		`+ cwd=tmppo)`
		`else:`
		`print(" template doesn't exist, is it a translation-finder bug?")`

		`stats.close()`
		`error.close()`

		`- subprocess.run(["sed", "-i", "-e", "s\|{p}\|.\|g".format(p=path),`
		`- result_folder + '/{p}.errors.txt'.format(p=name)], check=True)`
		`+ subprocess.run(["sed",`
		`+ "-i",`
		`+ "-e",`
		`+ "s\|{p}\|.\|g".format(p=path),`
		`+ result_folder + '/{p}.errors.txt'.format(p=name)],`
		`+ check=True)`
		`+`

		`- def unknown_format(path, results, srpm, tformat, result_folder):`
		`- print("unknown_format:")`
		`+ def unknown_format(results, srpm, tformat, result_folder):`
		`+ print("unknown_format: " + tformat)`

		`- with open(result_folder + "/todo_"+tformat+".txt", "a") as file:`
		`+ with open(result_folder + "/todo_" + tformat + ".txt", "a") as file:`
		`file.write(srpm + " " + results["filemask"] + "\n")`

		`+`
		`if __name__ == '__main__':`
		`main()`
		`-`

build_map.py

file modified

+1 -1

		`@@ -18,7 +18,7 @@`
		`parser = argparse.ArgumentParser(`
		`description="From a result file, build a json file for map rendering")`
		`parser.add_argument("--release", required=True, type=int, default=31,`
		`- choices=[30, 31],`
		`+ choices=[30, 31, 32],`
		`help="Provide the Fedora release to analyze")`
		`parser.add_argument("--include_english", required=False, default=False, type=bool,`
		`help="Include english language in statistics?")`

build_stats.py

file modified

+1 -1

		`@@ -16,7 +16,7 @@`
		`parser = argparse.ArgumentParser(`
		`description="Consolidate every result files and produce a clean concatenated update")`
		`parser.add_argument("--release", required=True, type=int, default=31,`
		`- choices=[30, 31],`
		`+ choices=[30, 31, 32],`
		`help="Provide the Fedora release to analyze")`

		`args = parser.parse_args()`

build_tm.py

file modified

+1 -1

		`@@ -14,7 +14,7 @@`
		`description="Creates compendium for every languages")`

		`parser.add_argument("--release", required=True, type=int, default=31,`
		`- choices=[30, 31],`
		`+ choices=[30, 31, 32],`
		`help="Provide the Fedora release to analyze")`

		`parser.add_argument("--lang", required=False, type=str,`

convertCSV.py

file added

+58

		`@@ -0,0 +1,58 @@`
		`+ #!/usr/bin/env python3`
		`+`
		`+ import argparse`
		`+ import csv`
		`+ import json`
		`+`
		`+ EXTENSION_MAP = (`
		`+ (".po", "po"),`
		`+ ("strings.xml", "aresource"),`
		`+ (".ini", "joomla"),`
		`+ (".csv", "csv"),`
		`+ (".json", "json-nested"),`
		`+ (".dtd", "dtd"),`
		`+ (".php", "php"),`
		`+ (".xlf", "xliff"),`
		`+ (".xliff", "xliff"),`
		`+ (".ts", "ts"),`
		`+ (".resx", "resx"),`
		`+ (".resw", "resx"),`
		`+ (".xlsx", "xlsx"),`
		`+ (".yml", "yaml"),`
		`+ (".yaml", "yaml"),`
		`+ (".properties", "properties"),`
		`+ (".strings", "strings"),`
		`+ )`
		`+`
		`+`
		`+ def main():`
		`+ """Handle params"""`
		`+`
		`+ parser = argparse.ArgumentParser(`
		`+ description="Convert a data.json into csv")`
		`+ parser.add_argument("--json", required=True,`
		`+ help="Json file to convert")`
		`+ args = parser.parse_args()`
		`+`
		`+ with open(args.json) as f:`
		`+ data = json.load(f)`
		`+`
		`+ with open('out.csv', mode='w') as csv_file:`
		`+ csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)`
		`+`
		`+ row = ["package", "srpm", "tsearch", "tcopy"]`
		`+ for end, result in EXTENSION_MAP:`
		`+ row.append(end)`
		`+`
		`+ csv_writer.writerow(row)`
		`+`
		`+`
		`+ for d in data:`
		`+ row = [d, data[d]["srpm"], data[d]["tsearch"], data[d]["tcopy"]]`
		`+ for end, result in EXTENSION_MAP:`
		`+ row.append(data[d]["results"].get(result,0))`
		`+`
		`+ csv_writer.writerow(row)`
		`+`
		`+ if __name__ == '__main__':`
		`+ main()`

docker/Dockerfile.30

file added

+12

		`@@ -0,0 +1,12 @@`
		`+ FROM registry.fedoraproject.org/fedora:30`
		`+`
		`+ RUN dnf install -y lbzip2 unzip xz git cpio translate-toolkit dnf-plugins-core python3-pip rsync vim`
		`+`
		`+ COPY requirements.txt /src/requirements.txt`
		`+ RUN pip3 install --no-cache -r /src/requirements.txt`
		`+`
		`+ # Fix missing metalink for f30`
		`+ COPY docker/fedora-updates-modular.repo /etc/yum.repos.d/fedora-updates-modular.repo`
		`+`
		`+ VOLUME /src`
		`+ WORKDIR /src`

docker/Dockerfile.32

file added

+12

		`@@ -0,0 +1,12 @@`
		`+ FROM registry.fedoraproject.org/fedora:32`
		`+`
		`+ RUN dnf install -y lbzip2 unzip xz git cpio translate-toolkit dnf-plugins-core python3-pip rsync vim`
		`+`
		`+ COPY requirements.txt /src/requirements.txt`
		`+ RUN pip install --no-cache -r /src/requirements.txt`
		`+ RUN pip install --upgrade https://github.com/WeblateOrg/language-data/archive/master.zip`
		`+ RUN pip install charamel`
		`+ RUN pip install git+https://github.com/WeblateOrg/translation-finder.git`
		`+`
		`+ VOLUME /src`
		`+ WORKDIR /src`

docker/fedora-updates-modular.repo

file added

+38

		`@@ -0,0 +1,38 @@`
		`+ [updates-modular]`
		`+ name=Fedora Modular $releasever - $basearch - Updates`
		`+ failovermethod=priority`
		`+ #baseurl=http://download.fedoraproject.org/pub/fedora/linux/updates/$releasever/Modular/$basearch/`
		`+ metalink=https://mirrors.fedoraproject.org/metalink?repo=updates-released-modular-f$releasever&arch=$basearch`
		`+ enabled=1`
		`+ repo_gpgcheck=0`
		`+ type=rpm`
		`+ gpgcheck=1`
		`+ metadata_expire=6h`
		`+ gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-$releasever-$basearch`
		`+ skip_if_unavailable=False`
		`+`
		`+ [updates-modular-debuginfo]`
		`+ name=Fedora Modular $releasever - $basearch - Updates - Debug`
		`+ failovermethod=priority`
		`+ #baseurl=http://download.fedoraproject.org/pub/fedora/linux/updates/$releasever/Modular/$basearch/debug/`
		`+ metalink=https://mirrors.fedoraproject.org/metalink?repo=updates-released-modular-debug-f$releasever&arch=$basearch`
		`+ enabled=0`
		`+ repo_gpgcheck=0`
		`+ type=rpm`
		`+ gpgcheck=1`
		`+ metadata_expire=6h`
		`+ gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-$releasever-$basearch`
		`+ skip_if_unavailable=False`
		`+`
		`+ [updates-modular-source]`
		`+ name=Fedora Modular $releasever - Updates Source`
		`+ failovermethod=priority`
		`+ baseurl=http://download.fedoraproject.org/pub/fedora/linux/updates/$releasever/Modular/SRPMS/`
		`+ #metalink=https://mirrors.fedoraproject.org/metalink?repo=updates-released-modular-source-f$releasever&arch=$basearch`
		`+ enabled=0`
		`+ repo_gpgcheck=0`
		`+ type=rpm`
		`+ gpgcheck=1`
		`+ metadata_expire=6h`
		`+ gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-$releasever-$basearch`
		`+ skip_if_unavailable=False`

extract_srpm.sh

file modified

+7 -4

		`@@ -3,7 +3,7 @@`
		`# extract every existing archives (that most probably are source code)`

		`folder=$1`
		`- package=$(pwd)/$2`
		`+ package=$2`

		`hop=$(pwd)/ls.txt`
		`tmp=$(mktemp -d)`
		`@@ -13,12 +13,15 @@`
		`exit 1`
		`fi`

		`- rm -rf -- "$folder"/*`
		`+ # please, don't do that, ever`
		`+ # rm -rf -- "$folder"/*`
		`# remove hidden files`
		`- rm -rf -- "$folder"/.* 2> /dev/null`
		`+ # rm -rf -- "$folder"/.* 2> /dev/null`
		`+ rm -rf $folder`
		`+ mkdir -p $folder`

		`pushd "$tmp" > /dev/null`
		`-`
		`+ echo "extract $package"`
		`rpm2cpio "$package" \| cpio -idm --no-preserve-owner --quiet`

		`# TODO: multiple archives in one srpm sqlite-3.26.0-3.fc30.src.rpm`

manual-discover/libreoffice-6.2.2.2-4.fc30.src.rpm.json

file removed

-6

		`@@ -1,6 +0,0 @@`
		`- [`
		`- {`
		`- "filemask":"translations/source/*/accessibility/messages.po",`
		`- "file_format":"po"`
		`- }`
		`- ]`
		`\ No newline at end of file`