| |
@@ -1,71 +1,166 @@
|
| |
#!/usr/bin/env python3
|
| |
- """ For each packages in src.rpms folder :"""
|
| |
- """ extract srpm """
|
| |
- """ run the translation_finder """
|
| |
- """ Then, concat csv files """
|
| |
-
|
| |
+ # For each packages in src.rpms folder :
|
| |
+ # extract srpm
|
| |
+ # run the translation_finder
|
| |
+ # Then, concat csv files
|
| |
import argparse
|
| |
+ import dnf
|
| |
+ import json
|
| |
import glob
|
| |
+ import distro
|
| |
import os
|
| |
+ import rpm
|
| |
import subprocess
|
| |
import tempfile
|
| |
- import yaml
|
| |
+ import time
|
| |
+ import datetime as dt
|
| |
+ import re
|
| |
|
| |
- from shutil import copyfile, copy2
|
| |
+ from shutil import copyfile
|
| |
from translation_finder import discover
|
| |
+ from urllib.parse import urlparse
|
| |
+
|
| |
|
| |
def main():
|
| |
"""Handle params"""
|
| |
|
| |
parser = argparse.ArgumentParser(
|
| |
description="Computes stats for each srpm detected")
|
| |
- parser.add_argument("--srpm", required=False,
|
| |
- help="Only work on one SRPM, if selected")
|
| |
- parser.add_argument("--offset", required=False, type=int,
|
| |
- help="Provide the number of packages to ignore")
|
| |
- parser.add_argument("--release", required=True, type=int, default=31,
|
| |
- choices=[30, 31],
|
| |
- help="Provide the Fedora release to analyze")
|
| |
+ parser.add_argument("filter", default=None, nargs='?',
|
| |
+ help="package name filter (regex)")
|
| |
+ parser.add_argument("-k", "--keep-srpms", default=False,
|
| |
+ action='store_true', dest='keep',
|
| |
+ help="Keep SRPMs in /srpms")
|
| |
+ parser.add_argument("-f", "--force", default=False,
|
| |
+ action='store_true', dest='force',
|
| |
+ help="Ignore past progression state")
|
| |
args = parser.parse_args()
|
| |
|
| |
- srpm_folder="./src.rpms/f{v}/".format(v=args.release)
|
| |
- result_folder="./results/f{v}/".format(v=args.release)
|
| |
- tm_folder="./tm/f{v}/".format(v=args.release)
|
| |
+ srpm_regex = None
|
| |
+ if args.filter:
|
| |
+ srpm_regex = re.compile("^{}$".format(args.filter))
|
| |
+
|
| |
+ (distname, distrel, distid) = distro.linux_distribution()
|
| |
+ result_folder = "./results/f{v}/".format(v=distrel)
|
| |
+ tm_folder = "./tm/f{v}/".format(v=distrel)
|
| |
+ srpms_path = "/srpms"
|
| |
+
|
| |
+ if not os.path.exists(result_folder):
|
| |
+ os.makedirs(result_folder)
|
| |
+ if not os.path.exists(tm_folder):
|
| |
+ os.makedirs(tm_folder)
|
| |
+
|
| |
+ processing_file = os.path.join(result_folder, "data.json")
|
| |
+ srpm_list_file = os.path.join(result_folder, "srpm.txt")
|
| |
+ url_list = None
|
| |
+
|
| |
+ if os.path.isfile(srpm_list_file):
|
| |
+ list_file_stats = os.stat(srpm_list_file)
|
| |
+ last_mod = dt.datetime.fromtimestamp(list_file_stats.st_mtime)
|
| |
+ if dt.datetime.now() - last_mod < dt.timedelta(hours=24):
|
| |
+ with open(srpm_list_file) as f:
|
| |
+ url_list = f.readlines()
|
| |
+
|
| |
+ if not url_list:
|
| |
+ print("Fetching SRPMs url list")
|
| |
+ p = subprocess.Popen('dnf download --source --skip-broken --url "*" | grep src.rpm',
|
| |
+ stdout=subprocess.PIPE,
|
| |
+ shell=True)
|
| |
+
|
| |
+ urls = str(p.stdout.read(), "utf-8")
|
| |
+ with open(srpm_list_file, 'w') as f:
|
| |
+ f.write(urls)
|
| |
+ url_list = urls.splitlines()
|
| |
+
|
| |
+ # Load processing data, if any
|
| |
+ try:
|
| |
+ with open(processing_file) as f:
|
| |
+ data = json.load(f)
|
| |
+ except BaseException:
|
| |
+ data = {}
|
| |
|
| |
- pkgs = []
|
| |
- for (dirpath, dirnames, filenames) in os.walk(srpm_folder):
|
| |
- pkgs.extend(filenames)
|
| |
- break
|
| |
count = 0
|
| |
+ total_urls = len(url_list)
|
| |
+
|
| |
+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmp:
|
| |
+ for line in url_list:
|
| |
+ count += 1
|
| |
+ url = urlparse(line.strip())
|
| |
+ if not url.scheme:
|
| |
+ continue
|
| |
+ srpm_filename = os.path.basename(url.path)
|
| |
+ srpm_data = dnf.subject.Subject(srpm_filename)
|
| |
+ package = srpm_data.get_nevra_possibilities(forms=1)[0]
|
| |
+
|
| |
+ if srpm_regex and not srpm_regex.match(package.name):
|
| |
+ continue
|
| |
+
|
| |
+ if package.name in data and not args.force:
|
| |
+ # Compare version
|
| |
+ known_package = dnf.subject.Subject(
|
| |
+ data[package.name]["srpm"]).get_nevra_possibilities(forms=1)[0]
|
| |
+ if rpm.labelCompare(
|
| |
+ (package.epoch,
|
| |
+ package.version,
|
| |
+ package.release),
|
| |
+ (known_package.epoch,
|
| |
+ known_package.version,
|
| |
+ known_package.release)) <= 0:
|
| |
+ print("{c}/{t} skipping already processed {n}".format(
|
| |
+ c=count, t=total_urls, n=package.name))
|
| |
+ continue
|
| |
|
| |
- if args.srpm:
|
| |
- print("argument srpm is provided: " + args.srpm)
|
| |
- with tempfile.TemporaryDirectory() as tmp:
|
| |
- package = [x for x in pkgs if x == args.srpm][0]
|
| |
- srpm_file = "{srpm}/{a}".format(srpm=srpm_folder, a=package)
|
| |
- extract_srpm(tmp, srpm_file, result_folder)
|
| |
- discover_translations(tmp, package, result_folder, tm_folder)
|
| |
- else:
|
| |
- with tempfile.TemporaryDirectory() as tmp:
|
| |
- if args.offset:
|
| |
- pkgs = pkgs[slice(args.offset, len(pkgs))]
|
| |
-
|
| |
- for package in pkgs:
|
| |
- count += 1
|
| |
- print("")
|
| |
- print("{c}/{m}".format(c=count, m=len(pkgs)))
|
| |
-
|
| |
- if package.startswith("libreoffice"):
|
| |
- print("package ignored because really slow, please use --srpm")
|
| |
+ print("{c}/{t} processing {n}".format(
|
| |
+ c=count, t=total_urls, n=package.name))
|
| |
+
|
| |
+ srpm_path = os.path.join(srpms_path, srpm_filename)
|
| |
+ if not os.path.isfile(srpm_path):
|
| |
+ print("downloading {}".format(srpm_filename))
|
| |
+ if url.scheme == "rsync":
|
| |
+ dl = subprocess.run(
|
| |
+ ['rsync', url.geturl(), srpms_path],
|
| |
+ stdin=subprocess.PIPE,
|
| |
+ stdout=subprocess.PIPE,
|
| |
+ stderr=subprocess.STDOUT)
|
| |
+ else:
|
| |
+ dl = subprocess.run(
|
| |
+ ['curl', '-L', '--remote-name', url.geturl()],
|
| |
+ stdin=subprocess.PIPE,
|
| |
+ stdout=subprocess.PIPE,
|
| |
+ stderr=subprocess.STDOUT,
|
| |
+ cwd=srpms_path)
|
| |
+
|
| |
+ if dl.returncode:
|
| |
+ print("error downloading srpm:")
|
| |
+ print(dl.stdout)
|
| |
continue
|
| |
|
| |
- srpm_file = "{srpm}/{a}".format(srpm=srpm_folder, a=package)
|
| |
- extract_srpm(tmp, srpm_file, result_folder)
|
| |
- discover_translations(tmp, package, result_folder, tm_folder)
|
| |
+ extract_srpm(tmp, srpm_path, result_folder)
|
| |
+ (tsearch, tcopy, results) = discover_translations(
|
| |
+ tmp, package.name, result_folder, tm_folder)
|
| |
+
|
| |
+ if not args.keep:
|
| |
+ os.unlink(srpm_path)
|
| |
+
|
| |
+ # save processed srpm name & version
|
| |
+ data[package.name] = {
|
| |
+ "srpm": srpm_filename,
|
| |
+ "tsearch": tsearch,
|
| |
+ "tcopy": tcopy,
|
| |
+ "results": results}
|
| |
+
|
| |
+ with open(processing_file, "w") as f:
|
| |
+ json.dump(data, f, indent=2)
|
| |
+ print("")
|
| |
+
|
| |
+ # if package.startswith("libreoffice"):
|
| |
+ # print("package ignored because really slow, please use --srpm")
|
| |
+ # continue
|
| |
|
| |
subprocess.run(['./concat_csv.sh', result_folder],
|
| |
check=True)
|
| |
|
| |
+
|
| |
def extract_srpm(tmp, name, result_folder):
|
| |
"""extract srpm page"""
|
| |
print("extract_srpm: " + name)
|
| |
@@ -79,40 +174,55 @@
|
| |
out.close()
|
| |
error.close()
|
| |
|
| |
+
|
| |
def discover_translations(tmp, name, result_folder, tm_folder):
|
| |
"""find po file"""
|
| |
- print("discover_translations: "+tmp)
|
| |
+ print("discover_translations: " + tmp)
|
| |
translation_files = []
|
| |
+ tsearch = 0
|
| |
+ tcopy = 0
|
| |
+ cresults = 0
|
| |
|
| |
- # Check if there is a manual rule (like libreoffice)
|
| |
- manual = "manual-discover/" + name + ".json"
|
| |
- if os.path.isfile(manual):
|
| |
- with open(manual, 'r') as stream:
|
| |
- translation_files = yaml.load(stream, Loader=yaml.SafeLoader)
|
| |
- else:
|
| |
- try:
|
| |
- translation_files = discover(tmp)
|
| |
- except OSError:
|
| |
- print("error while searching for new")
|
| |
- with open(result_folder + "/errors.txt", "a") as file:
|
| |
- file.write(name + " on discover_translations\n")
|
| |
-
|
| |
- print(translation_files)
|
| |
+ tsearch = time.time()
|
| |
+ try:
|
| |
+ translation_files = discover(tmp)
|
| |
+ except OSError:
|
| |
+ with open(result_folder + "/errors.txt", "a") as file:
|
| |
+ file.write(name + " on discover_translations\n")
|
| |
+
|
| |
+ tsearch = round(time.time() - tsearch, 1)
|
| |
+
|
| |
+ tcopy = time.time()
|
| |
|
| |
if translation_files:
|
| |
for translation in translation_files:
|
| |
- # TODO: multiple translation files for same package gnome-clocks-3.32.0-1.fc30.src.rpm
|
| |
+ # TODO: multiple translation files for same package
|
| |
+ # gnome-clocks-3.32.0-1.fc30.src.rpm
|
| |
if translation["file_format"] == "po":
|
| |
- get_po_translation_level(tmp, translation, name, result_folder, tm_folder)
|
| |
+ get_po_translation_level(
|
| |
+ tmp, translation, name, result_folder, tm_folder)
|
| |
elif translation["file_format"] == "ts":
|
| |
get_ts_translation_level(tmp, translation, name, result_folder)
|
| |
elif translation["file_format"] == "json":
|
| |
- get_json_translation_level(tmp, translation, name, result_folder)
|
| |
+ get_json_translation_level(
|
| |
+ tmp, translation, name, result_folder)
|
| |
elif translation["file_format"] == "auto":
|
| |
# it's a detection of .tx configuration
|
| |
continue
|
| |
else:
|
| |
- unknown_format(tmp, translation, name, translation["file_format"], result_folder)
|
| |
+ unknown_format(
|
| |
+ translation,
|
| |
+ name,
|
| |
+ translation["file_format"],
|
| |
+ result_folder)
|
| |
+ tcopy = round(time.time() - tcopy, 1)
|
| |
+
|
| |
+ cresults = dict()
|
| |
+ for file in translation_files:
|
| |
+ cresults[file["file_format"]] = cresults.get(file["file_format"], 0) + 1
|
| |
+
|
| |
+ return (tsearch, tcopy, cresults)
|
| |
+
|
| |
|
| |
def get_po_translation_level(path, mask, name, result_folder, tm_folder):
|
| |
filemask = mask["filemask"]
|
| |
@@ -127,13 +237,18 @@
|
| |
error.close()
|
| |
|
| |
# Copy translation files in translation memory
|
| |
- for po in glob.glob(path +"/"+ filemask):
|
| |
- dest = tm_folder +"/"+ name +"/"+ filemask.split("*")[0]
|
| |
+ for po in glob.glob(path + "/" + filemask):
|
| |
+ dest = tm_folder + "/" + name + "/" + filemask.split("*")[0]
|
| |
os.makedirs(dest, exist_ok=True)
|
| |
- copy2(po, dest)
|
| |
-
|
| |
- subprocess.run(["sed", "-i", "-e", "s|{p}|.|g".format(p=path),
|
| |
- result_folder + '/{p}.errors.txt'.format(p=name)], check=True)
|
| |
+ # use copyfile instead of copy2 to handle read-only files in rpm
|
| |
+ copyfile(po, os.path.join(dest, os.path.basename(po)))
|
| |
+
|
| |
+ subprocess.run(["sed",
|
| |
+ "-i",
|
| |
+ "-e",
|
| |
+ "s|{p}|.|g".format(p=path),
|
| |
+ result_folder + '/{p}.errors.txt'.format(p=name)],
|
| |
+ check=True)
|
| |
|
| |
|
| |
def get_ts_translation_level(path, mask, name, result_folder):
|
| |
@@ -148,8 +263,13 @@
|
| |
stats.close()
|
| |
error.close()
|
| |
|
| |
- subprocess.run(["sed", "-i", "-e", "s|{p}|.|g".format(p=path),
|
| |
- result_folder + '/{p}.errors.txt'.format(p=name)], check=True)
|
| |
+ subprocess.run(["sed",
|
| |
+ "-i",
|
| |
+ "-e",
|
| |
+ "s|{p}|.|g".format(p=path),
|
| |
+ result_folder + '/{p}.errors.txt'.format(p=name)],
|
| |
+ check=True)
|
| |
+
|
| |
|
| |
def get_json_translation_level(path, mask, name, result_folder):
|
| |
filemask = mask["filemask"]
|
| |
@@ -159,41 +279,66 @@
|
| |
error = open(result_folder + '/{p}.errors.txt'.format(p=name), 'a')
|
| |
|
| |
# move only related json files to a temporary folder
|
| |
- with tempfile.TemporaryDirectory() as tmpjson:
|
| |
- for filename in glob.iglob(path+"/"+filemask):
|
| |
+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:
|
| |
+ for filename in glob.iglob(path + "/" + filemask):
|
| |
# if filesare in language subfolder, reproduce the hierarchy
|
| |
- dest = os.path.join(*(os.path.dirname(filename).split(os.path.sep)[3:]))
|
| |
+ dest = os.path.join(
|
| |
+ *(os.path.dirname(filename).split(os.path.sep)[3:]))
|
| |
os.makedirs(tmpjson + "/" + dest, exist_ok=True)
|
| |
|
| |
- copyfile(filename, tmpjson + "/" + dest + "/" + os.path.basename(filename))
|
| |
+ copyfile(
|
| |
+ filename,
|
| |
+ tmpjson +
|
| |
+ "/" +
|
| |
+ dest +
|
| |
+ "/" +
|
| |
+ os.path.basename(filename))
|
| |
|
| |
# convert json files to po files
|
| |
- with tempfile.TemporaryDirectory() as tmppo:
|
| |
+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmppo:
|
| |
# use existing template, in not existing (probably a bug), try "en"
|
| |
- template_file = tmpjson+"/"+mask.get("template", filemask.replace("*", "en"))
|
| |
+ template_file = tmpjson + "/" + \
|
| |
+ mask.get("template", filemask.replace("*", "en"))
|
| |
|
| |
if os.path.isfile(template_file):
|
| |
- subprocess.run(["json2po", "-t", template_file, tmpjson, tmppo, "--progress=none"],
|
| |
- stderr=error, check=True, cwd=tmppo)
|
| |
+ subprocess.run(["json2po",
|
| |
+ "-t",
|
| |
+ template_file,
|
| |
+ tmpjson,
|
| |
+ tmppo,
|
| |
+ "--progress=none"],
|
| |
+ stderr=error,
|
| |
+ check=True,
|
| |
+ cwd=tmppo)
|
| |
|
| |
# compute stats
|
| |
- subprocess.run(["pocount", filemask.split("*")[0], "--csv"],
|
| |
- stdout=stats, stderr=error, check=True, cwd=tmppo)
|
| |
+ subprocess.run(["pocount",
|
| |
+ filemask.split("*")[0],
|
| |
+ "--csv"],
|
| |
+ stdout=stats,
|
| |
+ stderr=error,
|
| |
+ check=True,
|
| |
+ cwd=tmppo)
|
| |
else:
|
| |
print(" template doesn't exist, is it a translation-finder bug?")
|
| |
|
| |
stats.close()
|
| |
error.close()
|
| |
|
| |
- subprocess.run(["sed", "-i", "-e", "s|{p}|.|g".format(p=path),
|
| |
- result_folder + '/{p}.errors.txt'.format(p=name)], check=True)
|
| |
+ subprocess.run(["sed",
|
| |
+ "-i",
|
| |
+ "-e",
|
| |
+ "s|{p}|.|g".format(p=path),
|
| |
+ result_folder + '/{p}.errors.txt'.format(p=name)],
|
| |
+ check=True)
|
| |
+
|
| |
|
| |
- def unknown_format(path, results, srpm, tformat, result_folder):
|
| |
- print("unknown_format:")
|
| |
+ def unknown_format(results, srpm, tformat, result_folder):
|
| |
+ print("unknown_format: " + tformat)
|
| |
|
| |
- with open(result_folder + "/todo_"+tformat+".txt", "a") as file:
|
| |
+ with open(result_folder + "/todo_" + tformat + ".txt", "a") as file:
|
| |
file.write(srpm + " " + results["filemask"] + "\n")
|
| |
|
| |
+
|
| |
if __name__ == '__main__':
|
| |
main()
|
| |
-
|
| |