#18 remplace subprocess by direct calls to translate toolbox
Merged 3 years ago by jibecfed. Opened 3 years ago by jibecfed.

file modified
+110 -57
@@ -6,14 +6,20 @@ 

  import json

  import os

  import subprocess

+ import shutil

  import tempfile

  

+ from io import BytesIO

+ from translate.convert import po2tmx

+ from translate.storage import factory, po

+ from translate.tools import poterminology

+ 

  

  def main():

      """Handle params"""

  

      parser = argparse.ArgumentParser(

-         description="Creates compendium for every languages")

+         description="Creates useful translator files for every languages")

  

      parser.add_argument("--results", required=True,

                          help="Set the results folder to use")
@@ -24,9 +30,6 @@ 

      parser.add_argument("--compress", action="store_true",

                          help="Compress output files")

  

-     parser.add_argument("--lang", required=False, type=str,

-                         help="Filter a language to analyze")

- 

      args = parser.parse_args()

  

      results_folder = "./results/{v}/".format(v=args.results)
@@ -34,84 +37,134 @@ 

      tm_folder = os.path.join(results_folder, "languages-tm/")

      os.makedirs(tm_folder, exist_ok=True)

  

+     # clean destination folders

+     if args.refresh and os.path.isdir(tm_folder):

+         shutil.rmtree(tm_folder)

+ 

+     if not os.path.exists(tm_folder):

+         os.makedirs(tm_folder)

+ 

      print("Building the translation memory for every languages")

  

-     if args.lang:

-         with open(os.path.join(lang_path, args.lang + ".json"), "r") as read_file:

+     langs = [f for f in os.listdir(lang_path) if os.path.isfile(os.path.join(lang_path, f))]

+ 

+     for lang in sorted(langs):

+         lang_code = lang[:-len('.json')]

+ 

+         print(" {l}".format(l=lang_code))

+ 

+         with open(os.path.join(lang_path, lang), "r") as read_file:

              files = json.load(read_file)["po"]

  

-         compute_lang(args.lang, files, tm_folder)

-     else:

-         langs = [f for f in os.listdir(lang_path) if os.path.isfile(os.path.join(lang_path, f))]

+         compendium_file = os.path.join(tm_folder, lang_code + ".po")

+         compendium_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), compendium_file)

+         if not os.path.isfile(compendium_file):

+             try:

+                 process_compendium(files, compendium_file)

+             except Exception as e:

+                 print(" Compendium generation triggered an {t} exception: {e}".format(t=type(e).__name__, e=e))

  

-         for lang in sorted(langs):

-             with open(os.path.join(lang_path, lang), "r") as read_file:

-                 files = json.load(read_file)["po"]

+         tmx_file = os.path.join(tm_folder, lang_code + ".tmx")

+         if not os.path.isfile(tmx_file):

+             try:

+                 process_tmx(lang_code, compendium_file, tmx_file)

+             except Exception as e:

+                 print(" TMX generation triggered an {t} exception: {e}".format(t=type(e).__name__, e=e))

  

-             compute_lang(lang[:-len('.json')], files, tm_folder, args.refresh)

+         terminology_file = os.path.join(tm_folder, lang_code + ".terminology.po")

+         if not os.path.isfile(terminology_file):

+             try:

+                 process_terminology(compendium_file, terminology_file)

+             except Exception as e:

+                 print(" Terminology generation triggered an {t} exception: {e}".format(t=type(e).__name__, e=e))

  

-         print("Detecting missing files")

-         for lang in sorted(langs):

-             check_lang(lang[:-len('.json')], tm_folder)

+     print("Detecting missing files")

+     for lang in sorted(langs):

+         check_lang(lang[:-len('.json')], tm_folder)

  

      if args.compress:

          print("Compressing files")

          compress(tm_folder)

  

  

- def compute_lang(lang, langfiles, tm_folder, refresh):

-     """ Generate compendium and convert it to tmx """

-     """  """

-     print(" Computing: " + lang)

- 

-     # po consolidation

-     compendium_file = os.path.join(tm_folder, lang + ".po")

-     compendium_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), compendium_file)

+ def process_compendium(langfiles, dest):

+     """ Generate a compendium (a concatenation of all po files) """

  

-     if not os.path.isfile(compendium_file) or refresh is True:

-         pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)), f) for f in langfiles]

-         count = 0

+     pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)), f) for f in langfiles]

+     count = 0

  

-         with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:

-             for i in pofiles:

+     with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:

+         for i in pofiles:

+             try:

+                 command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]

+                 subprocess.run(command, check=True, cwd=tmp, capture_output=True)

+             except subprocess.CalledProcessError:

                  try:

-                     command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]

+                     command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]

                      subprocess.run(command, check=True, cwd=tmp, capture_output=True)

-                 except subprocess.CalledProcessError:

-                     try:

-                         command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]

-                         subprocess.run(command, check=True, cwd=tmp, capture_output=True)

-                     except subprocess.CalledProcessError as e:

-                         print("Error with msguniq {i}, error: {e}".format(i=i, e=e))

+                 except subprocess.CalledProcessError as e:

+                     print("Error with msguniq {i}, error: {e}".format(i=i, e=e))

  

-                 count += 1

+             count += 1

  

-             onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]

-             command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles

+         onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]

+         command = ["msgcat", "--force-po", "--no-location", "--output-file", dest] + onlyfiles

  

-             try:

-                 subprocess.run(command, check=True, cwd=tmp, capture_output=True)

-             except subprocess.CalledProcessError:

-                 print(" msgcat exception...")

+         try:

+             subprocess.run(command, check=True, cwd=tmp, capture_output=True)

+         except subprocess.CalledProcessError:

+             print(" msgcat exception...")

  

-     # po to tmx convertion

-     tmx_file = os.path.join(tm_folder, lang + ".tmx")

-     command = ["po2tmx", "--language="+lang, "--progress=none",

-                compendium_file, "--output="+tmx_file]

-     if not os.path.isfile(tmx_file) or refresh is True:

-         subprocess.run(command, check=True, capture_output=True)

  

-     # language terminology

-     terminology_file = os.path.join(tm_folder, lang + ".terminology.po")

-     command = ["poterminology", "--ignore-case", "--fold-titlecase",

-                "--inputs-needed", "1",

-                "--progress=none", compendium_file, "--output=" + terminology_file]

-     if not os.path.isfile(terminology_file) or refresh is True:

-         subprocess.run(command, check=True, capture_output=True)

+ def process_tmx(lang, source, dest):

+     """ Generate a translation memory from a po file """

+ 

+     outputfile = po2tmx.tmxmultifile(dest)

+     po2tmx.convertpo(

+         inputfile=BytesIO(open(source, "r").read().encode()),

+         outputfile=outputfile,

+         templatefile=None,

+         sourcelanguage="en",

+         targetlanguage=lang,

+         comment="source"

+     )

+ 

+     outputfile.tmxfile.savefile(dest)

+ 

+ 

+ def process_terminology(source, dest):

+     """ Generate a termonology from a po file """

+ 

+     extractor = poterminology.TerminologyExtractor()

+     options = {

+         "inputmin": "1",

+         "fullmsgmin": "1",

+         "substrmin": "2",

+         "locmin": "2",

+         "nonstopmin": 1,

+         "sortorders": ["frequency", "dictionary", "length"],

+         "output": dest,

+     }

+ 

+     with open(source, "rb") as fh:

+         inputfile = factory.getobject(fh)

+ 

+     extractor.processunits(inputfile.units, source)

+     terms = extractor.extract_terms()

+ 

+     termfile = po.pofile()

+     termitems = extractor.filter_terms(

+         terms, nonstopmin=options["nonstopmin"], sortorders=options["sortorders"]

+     )

+     for count, unit in termitems:

+         termfile.units.append(unit)

+ 

+     with open(options["output"], "wb") as fh:

+         termfile.serialize(fh)

  

  

  def check_lang(lang, tm_folder):

-     """ make sure the files were generated """

+     """ Check if expected files were generated """

  

      compendium_file = os.path.join(tm_folder, lang + ".po")

      tmx_file = os.path.join(tm_folder, lang + ".tmx")

file modified
+1
@@ -2,3 +2,4 @@ 

  polib

  weblate-language-data

  langtable

+ translate-toolkit 

\ No newline at end of file

on a few packages, this saves 20% of execution time

1 new commit added

  • add "error management"
3 years ago

1 new commit added

  • remove useless
3 years ago

1 new commit added

  • add translate-toolkit requirement
3 years ago

Pull-Request has been merged by jibecfed

3 years ago
Metadata