PR#10: make stats calculation independant - fedora-l10n/fedora-localization-statistics

		`@@ -28,15 +28,25 @@`

		with `$script`, one of the following:

		`- ## Compute data`
		`+ ## Get the source packages`

		- `./build.py`
		+ `./build.py` get srpm lists, apply discover and compute progression stats

		`- The result will be in multiple files inside the results folder.`
		`+ ## Detect languages`

		`- ## Produce stats`
		+ `./build_language_list.py`

		- `./build_stats.py`
		`+ For each package, produce progression stats.`
		`+`
		`+ ## Produce per package stats`
		`+`
		+ `./build_packages_stats.py`
		`+`
		`+ For each package, produce progression stats.`
		`+`
		`+ ## Produce global stats`
		`+`
		+ `./build_global_stats.py`

		`Applies data cleanups and enhancements (cldr name).`

		`@@ -47,6 +57,15 @@`
		`Agregate the data per language, then apply it on territories (it uses stats from`
		`CLDR with language per territory).`

		`+ ## Produce translation memories`
		`+`
		+ `./build_tm.py`
		`+`
		`+ Detect the list of languages`
		`+ Aggregate all files for a language and produce a compendium, a terminology and a translation memory.`
		`+`
		`+ TODO: language detection should probably be a in a dedicated build file.`
		`+`
		`# Output files`

		* `0.error.language not in cldr.csv` contains unknown languages (lines are removed)

build.py

file modified

+23 -141

		`@@ -42,13 +42,13 @@`

		`(distname, distrel, distid) = distro.linux_distribution()`
		`result_folder = "./results/f{v}/stats/".format(v=distrel)`
		`- tm_folder = "./results/f{v}/packages/".format(v=distrel)`
		`+ packages_folder = "./results/f{v}/packages/".format(v=distrel)`
		`srpms_path = "/srpms"`

		`if not os.path.exists(result_folder):`
		`os.makedirs(result_folder)`
		`- if not os.path.exists(tm_folder):`
		`- os.makedirs(tm_folder)`
		`+ if not os.path.exists(packages_folder):`
		`+ os.makedirs(packages_folder)`

		`processing_file = os.path.join("./results/f{v}/".format(v=distrel), "data.json")`
		`srpm_list_file = os.path.join(srpms_path, "srpm.txt")`
		`@@ -137,7 +137,7 @@`

		`extract_srpm(tmp, srpm_path, result_folder)`
		`(tsearch, tcopy, results) = discover_translations(`
		`- tmp, package.name, result_folder, tm_folder)`
		`+ tmp, package.name, result_folder, packages_folder)`

		`if not args.keep:`
		`os.unlink(srpm_path)`
		`@@ -153,8 +153,6 @@`
		`json.dump(data, f, indent=2)`
		`print("")`

		`- concat_csv(result_folder)`
		`-`

		`def extract_srpm(tmp, name, result_folder):`
		`"""extract srpm page"""`
		`@@ -165,7 +163,8 @@`
		`subprocess.run(['./extract_srpm.sh', tmp, name],`
		`stdout=out, stderr=error, check=True)`

		`- def discover_translations(tmp, name, result_folder, tm_folder):`
		`+`
		`+ def discover_translations(tmp, name, result_folder, packages_folder):`
		`"""find po file"""`
		`print("discover_translations: " + tmp)`
		`translation_files = []`
		`@@ -185,30 +184,15 @@`
		`tcopy = time.time()`

		`if translation_files:`
		`- if not os.path.exists(os.path.join(tm_folder, name)):`
		`- os.makedirs(os.path.join(tm_folder, name))`
		`+ if not os.path.exists(os.path.join(packages_folder, name)):`
		`+ os.makedirs(os.path.join(packages_folder, name))`

		`- with open(os.path.join(tm_folder, name, "discover.json"), 'w') as f:`
		`+ with open(os.path.join(packages_folder, name, "discover.json"), 'w') as f:`
		`f.write(json.dumps(translation_files, indent=2))`

		`for translation in translation_files:`
		`- if translation["file_format"] == "po":`
		`- get_po_translation_level(`
		`- tmp, translation, name, result_folder, tm_folder)`
		`- elif translation["file_format"] == "ts":`
		`- get_ts_translation_level(tmp, translation, name, result_folder)`
		`- elif translation["file_format"] == "json":`
		`- get_json_translation_level(`
		`- tmp, translation, name, result_folder)`
		`- elif translation["file_format"] == "auto":`
		`- # it's a detection of .tx configuration`
		`- continue`
		`- else:`
		`- unknown_format(`
		`- translation,`
		`- name,`
		`- translation["file_format"],`
		`- result_folder)`
		`+ copy_translations(tmp, translation, name, result_folder, packages_folder)`
		`+`
		`tcopy = round(time.time() - tcopy, 1)`

		`cresults = dict()`
		`@@ -218,121 +202,19 @@`
		`return (tsearch, tcopy, cresults)`


		`- def get_po_translation_level(path, mask, name, result_folder, tm_folder):`
		`- filemask = mask["filemask"]`
		`-`
		`- with open(result_folder + '/{p}.stats.csv'.format(p=name), 'a') as stats:`
		`- with open(result_folder + '/{p}.errors.txt'.format(p=name), 'a') as error:`
		`- subprocess.run(["pocount", filemask.split("*")[0], "--csv"],`
		`- stdout=stats, stderr=error, check=True, cwd=path)`
		`-`
		`- # Copy translation files in translation memory`
		`- for po in glob.glob(path + "/" + filemask):`
		`- dest = tm_folder + "/" + name + "/" + filemask.split("*")[0]`
		`- os.makedirs(dest, exist_ok=True)`
		`- # use copyfile instead of copy2 to handle read-only files in rpm`
		`- copyfile(po, os.path.join(dest, os.path.basename(po)))`
		`-`
		`- subprocess.run(["sed",`
		`- "-i",`
		`- "-e",`
		`- "s\|{p}\|.\|g".format(p=path),`
		`- result_folder + '/{p}.errors.txt'.format(p=name)],`
		`- check=True)`
		`-`
		`-`
		`- def get_ts_translation_level(path, mask, name, result_folder):`
		`- filemask = mask["filemask"]`
		`-`
		`- with open(result_folder + '/{p}.stats.csv'.format(p=name), 'a') as stats:`
		`- with open(result_folder + '/{p}.errors.txt'.format(p=name), 'a') as error:`
		`- subprocess.run(["pocount", filemask.split("*")[0], "--csv"],`
		`- stdout=stats, stderr=error, check=True, cwd=path)`
		`-`
		`- subprocess.run(["sed",`
		`- "-i",`
		`- "-e",`
		`- "s\|{p}\|.\|g".format(p=path),`
		`- result_folder + '/{p}.errors.txt'.format(p=name)],`
		`- check=True)`
		`-`
		`-`
		`- def get_json_translation_level(path, mask, name, result_folder):`
		`- filemask = mask["filemask"]`
		`-`
		`- stats = open(result_folder + '/{p}.stats.csv'.format(p=name), 'a')`
		`- error = open(result_folder + '/{p}.errors.txt'.format(p=name), 'a')`
		`-`
		`- # move only related json files to a temporary folder`
		`- with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:`
		`- for filename in glob.iglob(path + "/" + filemask):`
		`- # if filesare in language subfolder, reproduce the hierarchy`
		`- dest = os.path.join(`
		`- *(os.path.dirname(filename).split(os.path.sep)[3:]))`
		`- os.makedirs(tmpjson + "/" + dest, exist_ok=True)`
		`-`
		`- copyfile(`
		`- filename,`
		`- tmpjson +`
		`- "/" +`
		`- dest +`
		`- "/" +`
		`- os.path.basename(filename))`
		`-`
		`- # convert json files to po files`
		`- with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmppo:`
		`- # use existing template, in not existing (probably a bug), try "en"`
		`- template_file = tmpjson + "/" + \`
		`- mask.get("template", filemask.replace("*", "en"))`
		`-`
		`- if os.path.isfile(template_file):`
		`- subprocess.run(["json2po",`
		`- "-t",`
		`- template_file,`
		`- tmpjson,`
		`- tmppo,`
		`- "--progress=none"],`
		`- stderr=error,`
		`- check=True,`
		`- cwd=tmppo)`
		`-`
		`- # compute stats`
		`- subprocess.run(["pocount",`
		`- filemask.split("*")[0],`
		`- "--csv"],`
		`- stdout=stats,`
		`- stderr=error,`
		`- check=True,`
		`- cwd=tmppo)`
		`- else:`
		`- print(" template doesn't exist, is it a translation-finder bug?")`
		`-`
		`- stats.close()`
		`- error.close()`
		`-`
		`- subprocess.run(["sed",`
		`- "-i",`
		`- "-e",`
		`- "s\|{p}\|.\|g".format(p=path),`
		`- result_folder + '/{p}.errors.txt'.format(p=name)],`
		`- check=True)`
		`-`
		`-`
		`- def unknown_format(results, srpm, tformat, result_folder):`
		`- with open(result_folder + "/todo_" + tformat + ".txt", "a") as file:`
		`- file.write(srpm + " " + results["filemask"] + "\n")`
		`-`
		`-`
		`- def concat_csv(result_folder):`
		`- filenames = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]`
		`- filenames = [f for f in filenames if f.endswith(".stats.csv")]`
		`-`
		`- with open(os.path.join(result_folder, "_concat.csv"), "w") as outfile:`
		`- for fname in filenames:`
		`- with open(os.path.join(result_folder, fname)) as infile:`
		`- for line in infile:`
		`- outfile.write(line)`
		`+ def copy_translations(tmp, translation, name, result_folder, packages_folder):`
		`+ # translation, name, result_folder, packages_folder`
		`+ filemask = translation["filemask"]`
		`+`
		`+ print("copy translations " + filemask)`
		`+`
		`+ if translation["file_format"] in ["po", "json"]:`
		`+ for po in glob.glob(tmp + "/" + filemask):`
		`+ dest = packages_folder + "/" + name + "/" + filemask.split("*")[0]`
		`+ os.makedirs(dest, exist_ok=True)`

		`+ # use copyfile instead of copy2 to handle read-only files in rpm`
		`+ copyfile(po, os.path.join(dest, os.path.basename(po)))`

		`if __name__ == '__main__':`
		`main()`

build_global_stats.py ~~build_stats.py~~

file renamed

+16 -1

		`@@ -21,7 +21,10 @@`

		`args = parser.parse_args()`

		`- RESULT_FOLDER = "./results/f{r}".format(r=args.release)`
		`+ RESULT_FOLDER = "./results/f{r}/stats/".format(r=args.release)`
		`+ packages_folder = "./results/f{r}/packages/".format(r=args.release)`
		`+`
		`+ concat_csv(packages_folder, RESULT_FOLDER)`

		`file = RESULT_FOLDER + "/_concat.csv"`
		`parse(file)`
		`@@ -376,5 +379,17 @@`
		`dataset.to_csv(RESULT_FOLDER+"/"+name, index=False)`


		`+ def concat_csv(packages_folder, stats_folder):`
		`+ dirs = [f for f in os.listdir(packages_folder) if os.path.isdir(os.path.join(packages_folder, f))]`
		`+`
		`+ with open(os.path.join(stats_folder, "_concat.csv"), "w") as outfile:`
		`+ for name in dirs:`
		`+ try:`
		`+ with open(os.path.join(packages_folder, name, "stats.csv")) as infile:`
		`+ for line in infile:`
		`+ outfile.write(line)`
		`+ except FileNotFoundError:`
		`+ pass`
		`+`
		`if __name__ == '__main__':`
		`main()`

build_language_list.py

file added

+133

		`@@ -0,0 +1,133 @@`
		`+ #!/usr/bin/env python3`
		`+ """ Parse translation files to deduct language list """`
		`+`
		`+ import argparse`
		`+ import json`
		`+ import os`
		`+ import time`
		`+ import polib`
		`+`
		`+ from shutil import rmtree`
		`+ from weblate_language_data import aliases, languages, language_codes`
		`+`
		`+ def main():`
		`+ """Handle params"""`
		`+`
		`+ parser = argparse.ArgumentParser(`
		`+ description="Creates a list of languages form translation files")`
		`+`
		`+ parser.add_argument("--release", required=True, type=int, default=31,`
		`+ choices=[30, 31, 32],`
		`+ help="Provide the Fedora release to analyze")`
		`+`
		`+ parser.add_argument("--refresh", action="store_true",`
		`+ help="Force refresh")`
		`+`
		`+ args = parser.parse_args()`
		`+`
		`+ release_folder = "./results/f{v}/".format(v=args.release)`
		`+ lang_path = os.path.join(release_folder, "languages/")`
		`+ packages_path = os.path.join(release_folder, "packages/")`
		`+`
		`+ print("Refreshing the list of languages")`
		`+ rmtree(lang_path, ignore_errors=True)`
		`+ os.mkdir(lang_path)`
		`+`
		`+ start_time_search = time.time()`
		`+`
		`+ po_langs = detect_languages(packages_path)`
		`+`
		`+ for lang in po_langs.keys():`
		`+ with open(os.path.join(lang_path, lang + '.json'), 'w') as f:`
		`+ f.write(json.dumps(po_langs[lang], indent=2))`
		`+`
		`+ search_duration = round(time.time() - start_time_search, 1)`
		`+ print(" Done in {d} seconds".format(d=search_duration))`
		`+`
		`+`
		`+ def detect_languages(tm_folder):`
		`+ """ For each po file, detect metadatas and deduct the language """`
		`+ """ Requires: a file hierarchy with po files """`
		`+ """ Returns: a dictionary of lists, key=lang code, value=file list """`
		`+ langs = {}`
		`+`
		`+ for root, directories, files in os.walk(tm_folder):`
		`+ for file in files:`
		`+ racine, ext = os.path.splitext(file)`
		`+ if ext == ".po":`
		`+ metadata = dict()`
		`+ error = ""`
		`+ try:`
		`+ metadata = polib.pofile(os.path.join(root, file)).metadata`
		`+ except UnicodeDecodeError as e:`
		`+ # encoding error, to investigate before using it in TM`
		`+ error = "error-unicode"`
		`+ except OSError as e:`
		`+ # maybe a polib bug? to investigate before using it in TM`
		`+ error = "error-os"`
		`+`
		`+ lang = choose_lang(racine, metadata, error)`
		`+`
		`+ try:`
		`+ langs[lang].append(os.path.join(root, file))`
		`+ except KeyError:`
		`+ langs[lang] = list()`
		`+ langs[lang].append(os.path.join(root, file))`
		`+`
		`+ return langs`
		`+`
		`+ def choose_lang(filename, metadata, error):`
		`+ """ From a po file and its medata, choose the most likely language code """`
		`+ """ By priority: the Language medata """`
		`+ """ Returns: a language code """`
		`+`
		`+ lang = ""`
		`+ file_name = filename.lower()`
		`+ meta_language = ""`
		`+ meta_team = ""`
		`+ try:`
		`+ meta_language = metadata.get("Language").lower()`
		`+ except AttributeError:`
		`+ pass`
		`+`
		`+ try:`
		`+ meta_team = metadata.get("Language-Team").lower()`
		`+ except AttributeError:`
		`+ pass`
		`+`
		`+ if meta_language in language_codes.LANGUAGES:`
		`+ lang = meta_language`
		`+`
		`+ elif file_name in language_codes.LANGUAGES:`
		`+ lang = file_name`
		`+ else:`
		`+ lang = "noresult"`
		`+`
		`+ # try languages (some codes here are exclused from languages_codes)`
		`+ if lang == "noresult":`
		`+ loc = [ lang[0] for lang in languages.LANGUAGES ]`
		`+`
		`+ if meta_language in loc:`
		`+ lang = meta_language.lower()`
		`+ elif file_name in loc:`
		`+ lang = file_name.lower()`
		`+`
		`+ # try ALIASES`
		`+ if lang == "noresult":`
		`+ if meta_language in aliases.ALIASES.keys():`
		`+ lang = aliases.ALIASES[meta_language].lower()`
		`+ elif file_name in aliases.ALIASES.keys():`
		`+ lang = aliases.ALIASES[file_name].lower()`
		`+ else:`
		`+ lang = "error"`
		`+`
		`+ # harmonization (example: mo = ro_MD)`
		`+ if lang in aliases.ALIASES.keys():`
		`+ lang = aliases.ALIASES[lang].lower()`
		`+`
		`+ return lang`
		`+`
		`+`
		`+ if __name__ == '__main__':`
		`+ main()`
		`+`

build_map.py

file modified

+1 -1

		`@@ -27,7 +27,7 @@`

		`args = parser.parse_args()`

		`- RESULT_FOLDER = "./results/f{r}".format(r=args.release)`
		`+ RESULT_FOLDER = "./results/f{r}/stats/".format(r=args.release)`

		`file = RESULT_FOLDER + "/3.result.csv"`
		`parse(file, args.include_english, args.include_nonofficial)`

build_packages_stats.py

file added

+136

		`@@ -0,0 +1,136 @@`
		`+ #!/usr/bin/env python3`
		`+ """For each package, compute stats"""`
		`+`
		`+ import argparse`
		`+ import glob`
		`+ import json`
		`+ import os`
		`+ import subprocess`
		`+ import tempfile`
		`+`
		`+ def main():`
		`+ """Handle params"""`
		`+`
		`+ parser = argparse.ArgumentParser(`
		`+ description="Computes stats for each srpm detected")`
		`+ parser.add_argument("--release", required=True, type=int, default=31,`
		`+ choices=[30, 31, 32],`
		`+ help="Provide the Fedora release to analyze")`
		`+`
		`+ args = parser.parse_args()`
		`+`
		`+ packages_folder = "./results/f{v}/packages/".format(v=args.release)`
		`+`
		`+ filenames = [f for f in os.listdir(packages_folder) if os.path.isdir(os.path.join(packages_folder, f))]`
		`+`
		`+ for package in sorted(filenames):`
		`+ with open(os.path.join(packages_folder, package, "discover.json"), 'r') as f:`
		`+ translation_files = json.load(f)`
		`+`
		`+ tmp = os.path.join(packages_folder, package)`
		`+`
		`+ for translation in translation_files:`
		`+ if translation["file_format"] == "po":`
		`+ get_po_translation_level(tmp, translation, package, packages_folder)`
		`+ elif translation["file_format"] == "ts":`
		`+ get_ts_translation_level(tmp, translation, package, packages_folder)`
		`+ elif translation["file_format"] == "json":`
		`+ get_json_translation_level(tmp, translation, package, packages_folder)`
		`+ elif translation["file_format"] == "auto":`
		`+ # it's a detection of .tx configuration`
		`+ continue`
		`+`
		`+ def get_po_translation_level(path, discover, name, packages_folder):`
		`+ filemask = discover["filemask"]`
		`+ stats_file = packages_folder + "/{p}/stats.csv".format(p=name)`
		`+ error_file = packages_folder + "/{p}/stats.errors.txt".format(p=name)`
		`+`
		`+ with open(stats_file, 'a') as stats:`
		`+ with open(error_file, 'a') as error:`
		`+ subprocess.run(["pocount", filemask.split("*")[0], "--csv"],`
		`+ stdout=stats, stderr=error, check=True, cwd=path)`
		`+`
		`+ subprocess.run(["sed",`
		`+ "-i",`
		`+ "-e",`
		`+ "s\|{p}\|.\|g".format(p=path),`
		`+ error_file],`
		`+ check=True)`
		`+`
		`+`
		`+ def get_ts_translation_level(path, discover, name, packages_folder):`
		`+ filemask = discover["filemask"]`
		`+ stats_file = packages_folder + "/{p}/stats.csv".format(p=name)`
		`+ error_file = packages_folder + "/{p}/stats.errors.txt".format(p=name)`
		`+`
		`+ with open(stats_file, 'a') as stats:`
		`+ with open(error_file, 'a') as error:`
		`+ subprocess.run(["pocount", filemask.split("*")[0], "--csv"],`
		`+ stdout=stats, stderr=error, check=True, cwd=path)`
		`+`
		`+ subprocess.run(["sed",`
		`+ "-i",`
		`+ "-e",`
		`+ "s\|{p}\|.\|g".format(p=path),`
		`+ error_file],`
		`+ check=True)`
		`+`
		`+`
		`+ def get_json_translation_level(path, discover, name, packages_folder):`
		`+ filemask = discover["filemask"]`
		`+`
		`+ stats_file = packages_folder + "/{p}/stats.csv".format(p=name)`
		`+ error_file = packages_folder + "/{p}/stats.errors.txt".format(p=name)`
		`+`
		`+ stats = open(stats_file, 'a')`
		`+ error = open(error_file, 'a')`
		`+`
		`+ # move only related json files to a temporary folder`
		`+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:`
		`+ for filename in glob.iglob(path + "/" + filemask):`
		`+ # if filesare in language subfolder, reproduce the hierarchy`
		`+ dest = os.path.join(`
		`+ *(os.path.dirname(filename).split(os.path.sep)[3:]))`
		`+ os.makedirs(tmpjson + "/" + dest, exist_ok=True)`
		`+`
		`+ # convert json files to po files`
		`+ with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmppo:`
		`+ # use existing template, in not existing (probably a bug), try "en"`
		`+ template_file = tmpjson + "/" + \`
		`+ discover.get("template", filemask.replace("*", "en"))`
		`+`
		`+ if os.path.isfile(template_file):`
		`+ subprocess.run(["json2po",`
		`+ "-t",`
		`+ template_file,`
		`+ tmpjson,`
		`+ tmppo,`
		`+ "--progress=none"],`
		`+ stderr=error,`
		`+ check=True,`
		`+ cwd=tmppo)`
		`+`
		`+ # compute stats`
		`+ subprocess.run(["pocount",`
		`+ filemask.split("*")[0],`
		`+ "--csv"],`
		`+ stdout=stats,`
		`+ stderr=error,`
		`+ check=True,`
		`+ cwd=tmppo)`
		`+ else:`
		`+ print(" template doesn't exist, is it a translation-finder bug?")`
		`+`
		`+ stats.close()`
		`+ error.close()`
		`+`
		`+ subprocess.run(["sed",`
		`+ "-i",`
		`+ "-e",`
		`+ "s\|{p}\|.\|g".format(p=path),`
		`+ error_file],`
		`+ check=True)`
		`+`
		`+`
		`+ if __name__ == '__main__':`
		`+ main()`

build_tm.py

file modified

+34 -126

		`@@ -2,18 +2,12 @@`
		`"""Consolidate each po files into compendium"""`

		`import argparse`
		`- import glob`
		`import json`
		`import os`
		`- import polib`
		`import subprocess`
		`import tempfile`
		`import time`

		`- from shutil import copyfile`
		`- from shutil import rmtree`
		`- from weblate_language_data import aliases, languages, language_codes`
		`-`
		`def main():`
		`"""Handle params"""`

		`@@ -24,37 +18,23 @@`
		`choices=[30, 31, 32],`
		`help="Provide the Fedora release to analyze")`

		`+ parser.add_argument("--refresh", action="store_true",`
		`+ help="Force refresh of files")`
		`+`
		`parser.add_argument("--lang", required=False, type=str,`
		`help="Filter a language to analyze")`

		`- parser.add_argument("--refresh", action="store_true",`
		`- help="Refresh list of available languages to analyze")`
		`-`
		`args = parser.parse_args()`

		`- release_folder = "./tm/f{v}/".format(v=args.release)`
		`+ release_folder = "./results/f{v}/".format(v=args.release)`
		`lang_path = os.path.join(release_folder, "languages/")`
		`packages_path = os.path.join(release_folder, "packages/")`
		`tm_folder = os.path.join(release_folder, "out/")`
		`+ os.makedirs(tm_folder, exist_ok=True)`

		`- # Step 1: compute the list of languages`
		`- if args.refresh:`
		`- print("Refresh the list of languages")`
		`- rmtree(lang_path)`
		`- os.mkdir(lang_path)`
		`-`
		`- start_time_search = time.time()`
		`-`
		`- po_langs = detect_languages(packages_path)`
		`+ print("Building the translation memory for every languages")`
		`+ start_time_search = time.time()`

		`- for lang in po_langs.keys():`
		`- with open(os.path.join(lang_path, lang + '.json'), 'w') as f:`
		`- f.write(json.dumps(po_langs[lang], indent=2))`
		`-`
		`- search_duration = round(time.time() - start_time_search, 1)`
		`- print(" Done in {d} seconds".format(d=search_duration))`
		`-`
		`- # Step 2: call TM activities`
		`if args.lang:`
		`with open(os.path.join(lang_path, args.lang + ".json"), "r") as read_file:`
		`files = json.load(read_file)`
		`@@ -67,134 +47,62 @@`
		`with open(os.path.join(lang_path, lang), "r") as read_file:`
		`files = json.load(read_file)`

		`- compute_lang(lang[:-len('.json')], files, tm_folder)`
		`-`
		`- def detect_languages(tm_folder):`
		`- """ For each po file, detect metadatas and deduct the language """`
		`- """ Requires: a file hierarchy with po files """`
		`- """ Returns: a dictionary of lists, key=lang code, value=file list """`
		`- langs = {}`
		`+ compute_lang(lang[:-len('.json')], files, tm_folder, args.refresh)`

		`- for root, directories, files in os.walk(tm_folder):`
		`- for file in files:`
		`- racine, ext = os.path.splitext(file)`
		`- if ext == ".po":`
		`- metadata = dict()`
		`- error = ""`
		`- try:`
		`- metadata = polib.pofile(os.path.join(root, file)).metadata`
		`- except UnicodeDecodeError as e:`
		`- # encoding error, to investigate before using it in TM`
		`- error = "error-unicode"`
		`- except OSError as e:`
		`- # maybe a polib bug? to investigate before using it in TM`
		`- error = "error-os"`
		`+ search_duration = round(time.time() - start_time_search, 1)`
		`+ print(" Done in {d} seconds".format(d=search_duration))`

		`- lang = choose_lang(racine, metadata, error)`

		`- try:`
		`- langs[lang].append(os.path.join(root, file))`
		`- except KeyError:`
		`- langs[lang] = list()`
		`- langs[lang].append(os.path.join(root, file))`
		`-`
		`- return langs`
		`-`
		`- def choose_lang(filename, metadata, error):`
		`- """ From a po file and its medata, choose the most likely language code """`
		`- """ By priority: the Language medata """`
		`- """ Returns: a language code """`
		`-`
		`- lang = ""`
		`- file_name = filename.lower()`
		`- meta_language = ""`
		`- meta_team = ""`
		`- try:`
		`- meta_language = metadata.get("Language").lower()`
		`- except AttributeError:`
		`- pass`
		`-`
		`- try:`
		`- meta_team = metadata.get("Language-Team").lower()`
		`- except AttributeError:`
		`- pass`
		`-`
		`- if meta_language in language_codes.LANGUAGES:`
		`- lang = meta_language`
		`-`
		`- elif file_name in language_codes.LANGUAGES:`
		`- lang = file_name`
		`- else:`
		`- lang = "noresult"`
		`-`
		`- # try languages (some codes here are exclused from languages_codes)`
		`- if lang == "noresult":`
		`- loc = [ lang[0] for lang in languages.LANGUAGES ]`
		`-`
		`- if meta_language in loc:`
		`- lang = meta_language`
		`- elif file_name in loc:`
		`- lang = file_name`
		`-`
		`- # try ALIASES`
		`- if lang == "noresult":`
		`- if meta_language in aliases.ALIASES.keys():`
		`- lang = aliases.ALIASES[meta_language]`
		`- elif file_name in aliases.ALIASES.keys():`
		`- lang = aliases.ALIASES[file_name]`
		`- else:`
		`- lang = "error"`
		`-`
		`- return lang`
		`-`
		`- def compute_lang(lang, langfiles, tm_folder):`
		`+ def compute_lang(lang, langfiles, tm_folder, refresh):`
		`""" Generate compendium and convert it to tmx """`
		`""" """`
		`- print("Computing: " + lang)`
		`+ print(" Computing: " + lang)`

		`# po consolidation`
		`compendium_file = tm_folder + lang + ".po"`
		`compendium_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), compendium_file)`

		`- pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)),f) for f in langfiles]`
		`-`
		`- count = 0`
		`+ if not os.path.isfile(compendium_file) or refresh is True:`
		`+ pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)),f) for f in langfiles]`
		`+ count = 0`

		`- with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:`
		`- for i in pofiles:`
		`- try:`
		`- command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]`
		`- subprocess.run(command, check=True, cwd=tmp, capture_output=True)`
		`- except subprocess.CalledProcessError as e:`
		`+ with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:`
		`+ for i in pofiles:`
		`try:`
		`- command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]`
		`+ command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]`
		`subprocess.run(command, check=True, cwd=tmp, capture_output=True)`
		`except subprocess.CalledProcessError as e:`
		`- print("Error with msguniq {i}, error: {e}".format(i=i, e=e))`
		`+ try:`
		`+ command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]`
		`+ subprocess.run(command, check=True, cwd=tmp, capture_output=True)`
		`+ except subprocess.CalledProcessError as e:`
		`+ print("Error with msguniq {i}, error: {e}".format(i=i, e=e))`

		`- count += 1`
		`+ count += 1`

		`- onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]`
		`- command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles`
		`+ onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]`
		`+ command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles`

		`- try:`
		`- subprocess.run(command, check=True, cwd=tmp, capture_output=True)`
		`- except subprocess.CalledProcessError as e:`
		`- print(" msgcat exception...")`
		`+ try:`
		`+ subprocess.run(command, check=True, cwd=tmp, capture_output=True)`
		`+ except subprocess.CalledProcessError as e:`
		`+ print(" msgcat exception...")`


		`# po to tmx convertion`
		`tmx_file = tm_folder + lang + ".tmx"`
		`command = ["po2tmx", "--language="+lang, "--progress=none",`
		`compendium_file, "--output="+tmx_file]`
		`- subprocess.run(command, check=True, capture_output=True)`
		`+ if not os.path.isfile(tmx_file) or refresh is True:`
		`+ subprocess.run(command, check=True, capture_output=True)`

		`# language terminology`
		`terminology_file = tm_folder + lang + ".terminology.po"`
		`command = ["poterminology", "--ignore-case", "--fold-titlecase",`
		`"--inputs-needed", "1",`
		`"--progress=none", compendium_file, "--output="+terminology_file]`
		`- subprocess.run(command, check=True, capture_output=True)`
		`+ if not os.path.isfile(tmx_file) or refresh is True:`
		`+ subprocess.run(command, check=True, capture_output=True)`

		`if __name__ == '__main__':`
		`main()`