| |
@@ -2,18 +2,12 @@
|
| |
"""Consolidate each po files into compendium"""
|
| |
|
| |
import argparse
|
| |
- import glob
|
| |
import json
|
| |
import os
|
| |
- import polib
|
| |
import subprocess
|
| |
import tempfile
|
| |
import time
|
| |
|
| |
- from shutil import copyfile
|
| |
- from shutil import rmtree
|
| |
- from weblate_language_data import aliases, languages, language_codes
|
| |
-
|
| |
def main():
|
| |
"""Handle params"""
|
| |
|
| |
@@ -24,37 +18,23 @@
|
| |
choices=[30, 31, 32],
|
| |
help="Provide the Fedora release to analyze")
|
| |
|
| |
+ parser.add_argument("--refresh", action="store_true",
|
| |
+ help="Force refresh of files")
|
| |
+
|
| |
parser.add_argument("--lang", required=False, type=str,
|
| |
help="Filter a language to analyze")
|
| |
|
| |
- parser.add_argument("--refresh", action="store_true",
|
| |
- help="Refresh list of available languages to analyze")
|
| |
-
|
| |
args = parser.parse_args()
|
| |
|
| |
- release_folder = "./tm/f{v}/".format(v=args.release)
|
| |
+ release_folder = "./results/f{v}/".format(v=args.release)
|
| |
lang_path = os.path.join(release_folder, "languages/")
|
| |
packages_path = os.path.join(release_folder, "packages/")
|
| |
tm_folder = os.path.join(release_folder, "out/")
|
| |
+ os.makedirs(tm_folder, exist_ok=True)
|
| |
|
| |
- # Step 1: compute the list of languages
|
| |
- if args.refresh:
|
| |
- print("Refresh the list of languages")
|
| |
- rmtree(lang_path)
|
| |
- os.mkdir(lang_path)
|
| |
-
|
| |
- start_time_search = time.time()
|
| |
-
|
| |
- po_langs = detect_languages(packages_path)
|
| |
+ print("Building the translation memory for every languages")
|
| |
+ start_time_search = time.time()
|
| |
|
| |
- for lang in po_langs.keys():
|
| |
- with open(os.path.join(lang_path, lang + '.json'), 'w') as f:
|
| |
- f.write(json.dumps(po_langs[lang], indent=2))
|
| |
-
|
| |
- search_duration = round(time.time() - start_time_search, 1)
|
| |
- print(" Done in {d} seconds".format(d=search_duration))
|
| |
-
|
| |
- # Step 2: call TM activities
|
| |
if args.lang:
|
| |
with open(os.path.join(lang_path, args.lang + ".json"), "r") as read_file:
|
| |
files = json.load(read_file)
|
| |
@@ -67,134 +47,62 @@
|
| |
with open(os.path.join(lang_path, lang), "r") as read_file:
|
| |
files = json.load(read_file)
|
| |
|
| |
- compute_lang(lang[:-len('.json')], files, tm_folder)
|
| |
-
|
| |
- def detect_languages(tm_folder):
|
| |
- """ For each po file, detect metadatas and deduct the language """
|
| |
- """ Requires: a file hierarchy with po files """
|
| |
- """ Returns: a dictionary of lists, key=lang code, value=file list """
|
| |
- langs = {}
|
| |
+ compute_lang(lang[:-len('.json')], files, tm_folder, args.refresh)
|
| |
|
| |
- for root, directories, files in os.walk(tm_folder):
|
| |
- for file in files:
|
| |
- racine, ext = os.path.splitext(file)
|
| |
- if ext == ".po":
|
| |
- metadata = dict()
|
| |
- error = ""
|
| |
- try:
|
| |
- metadata = polib.pofile(os.path.join(root, file)).metadata
|
| |
- except UnicodeDecodeError as e:
|
| |
- # encoding error, to investigate before using it in TM
|
| |
- error = "error-unicode"
|
| |
- except OSError as e:
|
| |
- # maybe a polib bug? to investigate before using it in TM
|
| |
- error = "error-os"
|
| |
+ search_duration = round(time.time() - start_time_search, 1)
|
| |
+ print(" Done in {d} seconds".format(d=search_duration))
|
| |
|
| |
- lang = choose_lang(racine, metadata, error)
|
| |
|
| |
- try:
|
| |
- langs[lang].append(os.path.join(root, file))
|
| |
- except KeyError:
|
| |
- langs[lang] = list()
|
| |
- langs[lang].append(os.path.join(root, file))
|
| |
-
|
| |
- return langs
|
| |
-
|
| |
- def choose_lang(filename, metadata, error):
|
| |
- """ From a po file and its medata, choose the most likely language code """
|
| |
- """ By priority: the Language medata """
|
| |
- """ Returns: a language code """
|
| |
-
|
| |
- lang = ""
|
| |
- file_name = filename.lower()
|
| |
- meta_language = ""
|
| |
- meta_team = ""
|
| |
- try:
|
| |
- meta_language = metadata.get("Language").lower()
|
| |
- except AttributeError:
|
| |
- pass
|
| |
-
|
| |
- try:
|
| |
- meta_team = metadata.get("Language-Team").lower()
|
| |
- except AttributeError:
|
| |
- pass
|
| |
-
|
| |
- if meta_language in language_codes.LANGUAGES:
|
| |
- lang = meta_language
|
| |
-
|
| |
- elif file_name in language_codes.LANGUAGES:
|
| |
- lang = file_name
|
| |
- else:
|
| |
- lang = "noresult"
|
| |
-
|
| |
- # try languages (some codes here are exclused from languages_codes)
|
| |
- if lang == "noresult":
|
| |
- loc = [ lang[0] for lang in languages.LANGUAGES ]
|
| |
-
|
| |
- if meta_language in loc:
|
| |
- lang = meta_language
|
| |
- elif file_name in loc:
|
| |
- lang = file_name
|
| |
-
|
| |
- # try ALIASES
|
| |
- if lang == "noresult":
|
| |
- if meta_language in aliases.ALIASES.keys():
|
| |
- lang = aliases.ALIASES[meta_language]
|
| |
- elif file_name in aliases.ALIASES.keys():
|
| |
- lang = aliases.ALIASES[file_name]
|
| |
- else:
|
| |
- lang = "error"
|
| |
-
|
| |
- return lang
|
| |
-
|
| |
- def compute_lang(lang, langfiles, tm_folder):
|
| |
+ def compute_lang(lang, langfiles, tm_folder, refresh):
|
| |
""" Generate compendium and convert it to tmx """
|
| |
""" """
|
| |
- print("Computing: " + lang)
|
| |
+ print(" Computing: " + lang)
|
| |
|
| |
# po consolidation
|
| |
compendium_file = tm_folder + lang + ".po"
|
| |
compendium_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), compendium_file)
|
| |
|
| |
- pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)),f) for f in langfiles]
|
| |
-
|
| |
- count = 0
|
| |
+ if not os.path.isfile(compendium_file) or refresh is True:
|
| |
+ pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)),f) for f in langfiles]
|
| |
+ count = 0
|
| |
|
| |
- with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:
|
| |
- for i in pofiles:
|
| |
- try:
|
| |
- command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]
|
| |
- subprocess.run(command, check=True, cwd=tmp, capture_output=True)
|
| |
- except subprocess.CalledProcessError as e:
|
| |
+ with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:
|
| |
+ for i in pofiles:
|
| |
try:
|
| |
- command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]
|
| |
+ command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]
|
| |
subprocess.run(command, check=True, cwd=tmp, capture_output=True)
|
| |
except subprocess.CalledProcessError as e:
|
| |
- print("Error with msguniq {i}, error: {e}".format(i=i, e=e))
|
| |
+ try:
|
| |
+ command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]
|
| |
+ subprocess.run(command, check=True, cwd=tmp, capture_output=True)
|
| |
+ except subprocess.CalledProcessError as e:
|
| |
+ print("Error with msguniq {i}, error: {e}".format(i=i, e=e))
|
| |
|
| |
- count += 1
|
| |
+ count += 1
|
| |
|
| |
- onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]
|
| |
- command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles
|
| |
+ onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]
|
| |
+ command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles
|
| |
|
| |
- try:
|
| |
- subprocess.run(command, check=True, cwd=tmp, capture_output=True)
|
| |
- except subprocess.CalledProcessError as e:
|
| |
- print(" msgcat exception...")
|
| |
+ try:
|
| |
+ subprocess.run(command, check=True, cwd=tmp, capture_output=True)
|
| |
+ except subprocess.CalledProcessError as e:
|
| |
+ print(" msgcat exception...")
|
| |
|
| |
|
| |
# po to tmx convertion
|
| |
tmx_file = tm_folder + lang + ".tmx"
|
| |
command = ["po2tmx", "--language="+lang, "--progress=none",
|
| |
compendium_file, "--output="+tmx_file]
|
| |
- subprocess.run(command, check=True, capture_output=True)
|
| |
+ if not os.path.isfile(tmx_file) or refresh is True:
|
| |
+ subprocess.run(command, check=True, capture_output=True)
|
| |
|
| |
# language terminology
|
| |
terminology_file = tm_folder + lang + ".terminology.po"
|
| |
command = ["poterminology", "--ignore-case", "--fold-titlecase",
|
| |
"--inputs-needed", "1",
|
| |
"--progress=none", compendium_file, "--output="+terminology_file]
|
| |
- subprocess.run(command, check=True, capture_output=True)
|
| |
+ if not os.path.isfile(tmx_file) or refresh is True:
|
| |
+ subprocess.run(command, check=True, capture_output=True)
|
| |
|
| |
if __name__ == '__main__':
|
| |
main()
|
| |