#10 make stats calculation independant
Merged 3 years ago by jibecfed. Opened 3 years ago by jibecfed.

file modified
+24 -5
@@ -28,15 +28,25 @@ 

  

  with `$script`, one of the following:

  

- ## Compute data

+ ## Get the source packages

  

- `./build.py`

+ `./build.py` get srpm lists, apply discover and compute progression stats

  

- The result will be in multiple files inside the results folder.

+ ## Detect languages

  

- ## Produce stats

+ `./build_language_list.py`

  

- `./build_stats.py`

+ For each package, produce progression stats.

+ 

+ ## Produce per package stats

+ 

+ `./build_packages_stats.py`

+ 

+ For each package, produce progression stats.

+ 

+ ## Produce global stats

+ 

+ `./build_global_stats.py`

  

  Applies data cleanups and enhancements (cldr name).

  
@@ -47,6 +57,15 @@ 

  Agregate the data per language, then apply it on territories (it uses stats from 

  CLDR with language per territory).

  

+ ## Produce translation memories

+ 

+ `./build_tm.py`

+ 

+ Detect the list of languages

+ Aggregate all files for a language and produce a compendium, a terminology and a translation memory.

+ 

+ TODO: language detection should probably be a in a dedicated build file.

+ 

  # Output files

  

  * `0.error.language not in cldr.csv` contains unknown languages (lines are removed)

file modified
+23 -141
@@ -42,13 +42,13 @@ 

  

      (distname, distrel, distid) = distro.linux_distribution()

      result_folder = "./results/f{v}/stats/".format(v=distrel)

-     tm_folder = "./results/f{v}/packages/".format(v=distrel)

+     packages_folder = "./results/f{v}/packages/".format(v=distrel)

      srpms_path = "/srpms"

  

      if not os.path.exists(result_folder):

          os.makedirs(result_folder)

-     if not os.path.exists(tm_folder):

-         os.makedirs(tm_folder)

+     if not os.path.exists(packages_folder):

+         os.makedirs(packages_folder)

  

      processing_file = os.path.join("./results/f{v}/".format(v=distrel), "data.json")

      srpm_list_file = os.path.join(srpms_path, "srpm.txt")
@@ -137,7 +137,7 @@ 

  

              extract_srpm(tmp, srpm_path, result_folder)

              (tsearch, tcopy, results) = discover_translations(

-                 tmp, package.name, result_folder, tm_folder)

+                 tmp, package.name, result_folder, packages_folder)

  

              if not args.keep:

                  os.unlink(srpm_path)
@@ -153,8 +153,6 @@ 

                  json.dump(data, f, indent=2)

              print("")

  

-     concat_csv(result_folder)

- 

  

  def extract_srpm(tmp, name, result_folder):

      """extract srpm page"""
@@ -165,7 +163,8 @@ 

              subprocess.run(['./extract_srpm.sh', tmp, name],

                             stdout=out, stderr=error, check=True)

  

- def discover_translations(tmp, name, result_folder, tm_folder):

+ 

+ def discover_translations(tmp, name, result_folder, packages_folder):

      """find po file"""

      print("discover_translations: " + tmp)

      translation_files = []
@@ -185,30 +184,15 @@ 

      tcopy = time.time()

  

      if translation_files:

-         if not os.path.exists(os.path.join(tm_folder, name)):

-             os.makedirs(os.path.join(tm_folder, name))

+         if not os.path.exists(os.path.join(packages_folder, name)):

+             os.makedirs(os.path.join(packages_folder, name))

  

-         with open(os.path.join(tm_folder, name, "discover.json"), 'w') as f:

+         with open(os.path.join(packages_folder, name, "discover.json"), 'w') as f:

              f.write(json.dumps(translation_files, indent=2))

  

          for translation in translation_files:

-             if translation["file_format"] == "po":

-                 get_po_translation_level(

-                     tmp, translation, name, result_folder, tm_folder)

-             elif translation["file_format"] == "ts":

-                 get_ts_translation_level(tmp, translation, name, result_folder)

-             elif translation["file_format"] == "json":

-                 get_json_translation_level(

-                     tmp, translation, name, result_folder)

-             elif translation["file_format"] == "auto":

-                 # it's a detection of .tx configuration

-                 continue

-             else:

-                 unknown_format(

-                     translation,

-                     name,

-                     translation["file_format"],

-                     result_folder)

+             copy_translations(tmp, translation, name, result_folder, packages_folder)

+ 

      tcopy = round(time.time() - tcopy, 1)

  

      cresults = dict()
@@ -218,121 +202,19 @@ 

      return (tsearch, tcopy, cresults)

  

  

- def get_po_translation_level(path, mask, name, result_folder, tm_folder):

-     filemask = mask["filemask"]

- 

-     with open(result_folder + '/{p}.stats.csv'.format(p=name), 'a') as stats:

-         with open(result_folder + '/{p}.errors.txt'.format(p=name), 'a') as error:

-             subprocess.run(["pocount", filemask.split("*")[0], "--csv"],

-                            stdout=stats, stderr=error, check=True, cwd=path)

- 

-     # Copy translation files in translation memory

-     for po in glob.glob(path + "/" + filemask):

-         dest = tm_folder + "/" + name + "/" + filemask.split("*")[0]

-         os.makedirs(dest, exist_ok=True)

-         # use copyfile instead of copy2 to handle read-only files in rpm

-         copyfile(po, os.path.join(dest, os.path.basename(po)))

- 

-     subprocess.run(["sed",

-                     "-i",

-                     "-e",

-                     "s|{p}|.|g".format(p=path),

-                     result_folder + '/{p}.errors.txt'.format(p=name)],

-                    check=True)

- 

- 

- def get_ts_translation_level(path, mask, name, result_folder):

-     filemask = mask["filemask"]

- 

-     with open(result_folder + '/{p}.stats.csv'.format(p=name), 'a') as stats:

-         with open(result_folder + '/{p}.errors.txt'.format(p=name), 'a') as error:

-             subprocess.run(["pocount", filemask.split("*")[0], "--csv"],

-                            stdout=stats, stderr=error, check=True, cwd=path)

- 

-     subprocess.run(["sed",

-                     "-i",

-                     "-e",

-                     "s|{p}|.|g".format(p=path),

-                     result_folder + '/{p}.errors.txt'.format(p=name)],

-                    check=True)

- 

- 

- def get_json_translation_level(path, mask, name, result_folder):

-     filemask = mask["filemask"]

- 

-     stats = open(result_folder + '/{p}.stats.csv'.format(p=name), 'a')

-     error = open(result_folder + '/{p}.errors.txt'.format(p=name), 'a')

- 

-     # move only related json files to a temporary folder

-     with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:

-         for filename in glob.iglob(path + "/" + filemask):

-             # if filesare in language subfolder, reproduce the hierarchy

-             dest = os.path.join(

-                 *(os.path.dirname(filename).split(os.path.sep)[3:]))

-             os.makedirs(tmpjson + "/" + dest, exist_ok=True)

- 

-             copyfile(

-                 filename,

-                 tmpjson +

-                 "/" +

-                 dest +

-                 "/" +

-                 os.path.basename(filename))

- 

-         # convert json files to po files

-         with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmppo:

-             # use existing template, in not existing (probably a bug), try "en"

-             template_file = tmpjson + "/" + \

-                 mask.get("template", filemask.replace("*", "en"))

- 

-             if os.path.isfile(template_file):

-                 subprocess.run(["json2po",

-                                 "-t",

-                                 template_file,

-                                 tmpjson,

-                                 tmppo,

-                                 "--progress=none"],

-                                stderr=error,

-                                check=True,

-                                cwd=tmppo)

- 

-                 # compute stats

-                 subprocess.run(["pocount",

-                                 filemask.split("*")[0],

-                                 "--csv"],

-                                stdout=stats,

-                                stderr=error,

-                                check=True,

-                                cwd=tmppo)

-             else:

-                 print("  template doesn't exist, is it a translation-finder bug?")

- 

-     stats.close()

-     error.close()

- 

-     subprocess.run(["sed",

-                     "-i",

-                     "-e",

-                     "s|{p}|.|g".format(p=path),

-                     result_folder + '/{p}.errors.txt'.format(p=name)],

-                    check=True)

- 

- 

- def unknown_format(results, srpm, tformat, result_folder):

-     with open(result_folder + "/todo_" + tformat + ".txt", "a") as file:

-         file.write(srpm + " " + results["filemask"] + "\n")

- 

- 

- def concat_csv(result_folder):

-     filenames = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]

-     filenames = [f for f in filenames if f.endswith(".stats.csv")]

- 

-     with open(os.path.join(result_folder, "_concat.csv"), "w") as outfile:

-         for fname in filenames:

-             with open(os.path.join(result_folder, fname)) as infile:

-                 for line in infile:

-                     outfile.write(line)

+ def copy_translations(tmp, translation, name, result_folder, packages_folder):

+     # translation, name, result_folder, packages_folder

+     filemask = translation["filemask"]

+ 

+     print("copy translations " + filemask)

+ 

+     if translation["file_format"] in ["po", "json"]:

+         for po in glob.glob(tmp + "/" + filemask):

+             dest = packages_folder + "/" + name + "/" + filemask.split("*")[0]

+             os.makedirs(dest, exist_ok=True)

  

+             # use copyfile instead of copy2 to handle read-only files in rpm

+             copyfile(po, os.path.join(dest, os.path.basename(po)))

  

  if __name__ == '__main__':

      main()

build_global_stats.py build_stats.py
file renamed
+16 -1
@@ -21,7 +21,10 @@ 

  

      args = parser.parse_args()

  

-     RESULT_FOLDER = "./results/f{r}".format(r=args.release)

+     RESULT_FOLDER = "./results/f{r}/stats/".format(r=args.release)

+     packages_folder = "./results/f{r}/packages/".format(r=args.release)

+ 

+     concat_csv(packages_folder, RESULT_FOLDER)

  

      file = RESULT_FOLDER + "/_concat.csv"

      parse(file)
@@ -376,5 +379,17 @@ 

      dataset.to_csv(RESULT_FOLDER+"/"+name, index=False)

  

  

+ def concat_csv(packages_folder, stats_folder):

+     dirs = [f for f in os.listdir(packages_folder) if os.path.isdir(os.path.join(packages_folder, f))]

+ 

+     with open(os.path.join(stats_folder, "_concat.csv"), "w") as outfile:

+         for name in dirs:

+             try:

+                 with open(os.path.join(packages_folder, name, "stats.csv")) as infile:

+                     for line in infile:

+                         outfile.write(line)

+             except FileNotFoundError:

+                 pass

+ 

  if __name__ == '__main__':

      main()

file added
+133
@@ -0,0 +1,133 @@ 

+ #!/usr/bin/env python3

+ """ Parse translation files to deduct language list """

+ 

+ import argparse

+ import json

+ import os

+ import time

+ import polib

+ 

+ from shutil import rmtree

+ from weblate_language_data import aliases, languages, language_codes

+ 

+ def main():

+     """Handle params"""

+ 

+     parser = argparse.ArgumentParser(

+     description="Creates a list of languages form translation files")

+ 

+     parser.add_argument("--release", required=True, type=int, default=31,

+                         choices=[30, 31, 32],

+                         help="Provide the Fedora release to analyze")

+ 

+     parser.add_argument("--refresh", action="store_true",

+                         help="Force refresh")

+ 

+     args = parser.parse_args()

+ 

+     release_folder = "./results/f{v}/".format(v=args.release)

+     lang_path = os.path.join(release_folder, "languages/")

+     packages_path = os.path.join(release_folder, "packages/")

+ 

+     print("Refreshing the list of languages")

+     rmtree(lang_path, ignore_errors=True)

+     os.mkdir(lang_path)

+ 

+     start_time_search = time.time()

+ 

+     po_langs = detect_languages(packages_path)

+ 

+     for lang in po_langs.keys():

+         with open(os.path.join(lang_path, lang + '.json'), 'w') as f:

+             f.write(json.dumps(po_langs[lang], indent=2))

+ 

+     search_duration = round(time.time() - start_time_search, 1)

+     print(" Done in {d} seconds".format(d=search_duration))

+ 

+ 

+ def detect_languages(tm_folder):

+     """ For each po file, detect metadatas and deduct the language     """

+     """ Requires: a file hierarchy with po files                       """

+     """ Returns: a dictionary of lists, key=lang code, value=file list """

+     langs = {}

+ 

+     for root, directories, files in os.walk(tm_folder):

+         for file in files:

+             racine, ext = os.path.splitext(file)

+             if ext == ".po":

+                 metadata = dict()

+                 error = ""

+                 try:

+                     metadata = polib.pofile(os.path.join(root, file)).metadata

+                 except UnicodeDecodeError as e:

+                     # encoding error, to investigate before using it in TM

+                     error = "error-unicode"

+                 except OSError as e:

+                     # maybe a polib bug? to investigate before using it in TM

+                     error = "error-os"

+ 

+                 lang = choose_lang(racine, metadata, error)

+ 

+                 try:

+                     langs[lang].append(os.path.join(root, file))

+                 except KeyError:

+                     langs[lang] = list()

+                     langs[lang].append(os.path.join(root, file))

+ 

+     return langs

+ 

+ def choose_lang(filename, metadata, error):

+     """ From a po file and its medata, choose the most likely language code """

+     """ By priority: the Language medata """

+     """ Returns: a language code """

+ 

+     lang = ""

+     file_name = filename.lower()

+     meta_language = ""

+     meta_team = ""

+     try:

+         meta_language = metadata.get("Language").lower()

+     except AttributeError:

+         pass

+ 

+     try:

+         meta_team = metadata.get("Language-Team").lower()

+     except AttributeError:

+         pass

+ 

+     if meta_language in language_codes.LANGUAGES:

+         lang = meta_language

+ 

+     elif file_name in language_codes.LANGUAGES:

+         lang = file_name

+     else:

+         lang = "noresult"

+ 

+     # try languages (some codes here are exclused from languages_codes)

+     if lang == "noresult":

+         loc = [ lang[0] for lang in languages.LANGUAGES ]

+ 

+         if meta_language in loc:

+             lang = meta_language.lower()

+         elif file_name in loc:

+             lang = file_name.lower()

+ 

+     # try ALIASES

+     if lang == "noresult":

+         if meta_language in aliases.ALIASES.keys():

+             lang = aliases.ALIASES[meta_language].lower()

+         elif file_name in aliases.ALIASES.keys():

+             lang = aliases.ALIASES[file_name].lower()

+         else:

+             lang = "error"

+ 

+     # harmonization (example: mo = ro_MD)

+     if lang in aliases.ALIASES.keys():

+         lang = aliases.ALIASES[lang].lower()

+ 

+     return lang

+ 

+ 

+ if __name__ == '__main__':

+     main()

+ 

file modified
+1 -1
@@ -27,7 +27,7 @@ 

  

      args = parser.parse_args()

  

-     RESULT_FOLDER = "./results/f{r}".format(r=args.release)

+     RESULT_FOLDER = "./results/f{r}/stats/".format(r=args.release)

  

      file = RESULT_FOLDER + "/3.result.csv"

      parse(file, args.include_english, args.include_nonofficial)

@@ -0,0 +1,136 @@ 

+ #!/usr/bin/env python3

+ """For each package, compute stats"""

+ 

+ import argparse

+ import glob

+ import json

+ import os

+ import subprocess

+ import tempfile

+ 

+ def main():

+     """Handle params"""

+ 

+     parser = argparse.ArgumentParser(

+         description="Computes stats for each srpm detected")

+     parser.add_argument("--release", required=True, type=int, default=31,

+                         choices=[30, 31, 32],

+                         help="Provide the Fedora release to analyze")

+ 

+     args = parser.parse_args()

+ 

+     packages_folder = "./results/f{v}/packages/".format(v=args.release)

+ 

+     filenames = [f for f in os.listdir(packages_folder) if os.path.isdir(os.path.join(packages_folder, f))]

+ 

+     for package in sorted(filenames):

+         with open(os.path.join(packages_folder, package, "discover.json"), 'r') as f:

+             translation_files = json.load(f)

+ 

+         tmp = os.path.join(packages_folder, package)

+ 

+         for translation in translation_files:

+             if translation["file_format"] == "po":

+                 get_po_translation_level(tmp, translation, package, packages_folder)

+             elif translation["file_format"] == "ts":

+                 get_ts_translation_level(tmp, translation, package, packages_folder)

+             elif translation["file_format"] == "json":

+                 get_json_translation_level(tmp, translation, package, packages_folder)

+             elif translation["file_format"] == "auto":

+                 # it's a detection of .tx configuration

+                 continue

+ 

+ def get_po_translation_level(path, discover, name, packages_folder):

+     filemask = discover["filemask"]

+     stats_file = packages_folder + "/{p}/stats.csv".format(p=name)

+     error_file = packages_folder + "/{p}/stats.errors.txt".format(p=name)

+ 

+     with open(stats_file, 'a') as stats:

+         with open(error_file, 'a') as error:

+             subprocess.run(["pocount", filemask.split("*")[0], "--csv"],

+                            stdout=stats, stderr=error, check=True, cwd=path)

+ 

+     subprocess.run(["sed",

+                     "-i",

+                     "-e",

+                     "s|{p}|.|g".format(p=path),

+                     error_file],

+                    check=True)

+ 

+ 

+ def get_ts_translation_level(path, discover, name, packages_folder):

+     filemask = discover["filemask"]

+     stats_file = packages_folder + "/{p}/stats.csv".format(p=name)

+     error_file = packages_folder + "/{p}/stats.errors.txt".format(p=name)

+ 

+     with open(stats_file, 'a') as stats:

+         with open(error_file, 'a') as error:

+             subprocess.run(["pocount", filemask.split("*")[0], "--csv"],

+                            stdout=stats, stderr=error, check=True, cwd=path)

+ 

+     subprocess.run(["sed",

+                     "-i",

+                     "-e",

+                     "s|{p}|.|g".format(p=path),

+                     error_file],

+                    check=True)

+ 

+ 

+ def get_json_translation_level(path, discover, name, packages_folder):

+     filemask = discover["filemask"]

+ 

+     stats_file = packages_folder + "/{p}/stats.csv".format(p=name)

+     error_file = packages_folder + "/{p}/stats.errors.txt".format(p=name)

+ 

+     stats = open(stats_file, 'a')

+     error = open(error_file, 'a')

+ 

+     # move only related json files to a temporary folder

+     with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:

+         for filename in glob.iglob(path + "/" + filemask):

+             # if filesare in language subfolder, reproduce the hierarchy

+             dest = os.path.join(

+                 *(os.path.dirname(filename).split(os.path.sep)[3:]))

+             os.makedirs(tmpjson + "/" + dest, exist_ok=True)

+ 

+         # convert json files to po files

+         with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmppo:

+             # use existing template, in not existing (probably a bug), try "en"

+             template_file = tmpjson + "/" + \

+                 discover.get("template", filemask.replace("*", "en"))

+ 

+             if os.path.isfile(template_file):

+                 subprocess.run(["json2po",

+                                 "-t",

+                                 template_file,

+                                 tmpjson,

+                                 tmppo,

+                                 "--progress=none"],

+                                stderr=error,

+                                check=True,

+                                cwd=tmppo)

+ 

+                 # compute stats

+                 subprocess.run(["pocount",

+                                 filemask.split("*")[0],

+                                 "--csv"],

+                                stdout=stats,

+                                stderr=error,

+                                check=True,

+                                cwd=tmppo)

+             else:

+                 print("  template doesn't exist, is it a translation-finder bug?")

+ 

+     stats.close()

+     error.close()

+ 

+     subprocess.run(["sed",

+                     "-i",

+                     "-e",

+                     "s|{p}|.|g".format(p=path),

+                     error_file],

+                    check=True)

+ 

+ 

+ if __name__ == '__main__':

+     main()

file modified
+34 -126
@@ -2,18 +2,12 @@ 

  """Consolidate each po files into compendium"""

  

  import argparse

- import glob

  import json

  import os

- import polib

  import subprocess

  import tempfile

  import time

  

- from shutil import copyfile

- from shutil import rmtree

- from weblate_language_data import aliases, languages, language_codes

- 

  def main():

      """Handle params"""

  
@@ -24,37 +18,23 @@ 

                          choices=[30, 31, 32],

                          help="Provide the Fedora release to analyze")

  

+     parser.add_argument("--refresh", action="store_true",

+                     help="Force refresh of files")

+ 

      parser.add_argument("--lang", required=False, type=str,

                          help="Filter a language to analyze")

  

-     parser.add_argument("--refresh", action="store_true",

-                         help="Refresh list of available languages to analyze")

- 

      args = parser.parse_args()

  

-     release_folder = "./tm/f{v}/".format(v=args.release)

+     release_folder = "./results/f{v}/".format(v=args.release)

      lang_path = os.path.join(release_folder, "languages/")

      packages_path = os.path.join(release_folder, "packages/")

      tm_folder = os.path.join(release_folder, "out/")

+     os.makedirs(tm_folder, exist_ok=True)

  

-     # Step 1: compute the list of languages

-     if args.refresh:

-         print("Refresh the list of languages")

-         rmtree(lang_path)

-         os.mkdir(lang_path)

- 

-         start_time_search = time.time()

- 

-         po_langs = detect_languages(packages_path)

+     print("Building the translation memory for every languages")

+     start_time_search = time.time()

  

-         for lang in po_langs.keys():

-             with open(os.path.join(lang_path, lang + '.json'), 'w') as f:

-                 f.write(json.dumps(po_langs[lang], indent=2))

- 

-         search_duration = round(time.time() - start_time_search, 1)

-         print(" Done in {d} seconds".format(d=search_duration))

- 

-     # Step 2: call TM activities

      if args.lang:

          with open(os.path.join(lang_path, args.lang + ".json"), "r") as read_file:

              files = json.load(read_file)
@@ -67,134 +47,62 @@ 

              with open(os.path.join(lang_path, lang), "r") as read_file:

                  files = json.load(read_file)

  

-             compute_lang(lang[:-len('.json')], files, tm_folder)

- 

- def detect_languages(tm_folder):

-     """ For each po file, detect metadatas and deduct the language     """

-     """ Requires: a file hierarchy with po files                       """

-     """ Returns: a dictionary of lists, key=lang code, value=file list """

-     langs = {}

+             compute_lang(lang[:-len('.json')], files, tm_folder, args.refresh)

  

-     for root, directories, files in os.walk(tm_folder):

-         for file in files:

-             racine, ext = os.path.splitext(file)

-             if ext == ".po":

-                 metadata = dict()

-                 error = ""

-                 try:

-                     metadata = polib.pofile(os.path.join(root, file)).metadata

-                 except UnicodeDecodeError as e:

-                     # encoding error, to investigate before using it in TM

-                     error = "error-unicode"

-                 except OSError as e:

-                     # maybe a polib bug? to investigate before using it in TM

-                     error = "error-os"

+     search_duration = round(time.time() - start_time_search, 1)

+     print(" Done in {d} seconds".format(d=search_duration))

  

-                 lang = choose_lang(racine, metadata, error)

  

-                 try:

-                     langs[lang].append(os.path.join(root, file))

-                 except KeyError:

-                     langs[lang] = list()

-                     langs[lang].append(os.path.join(root, file))

- 

-     return langs

- 

- def choose_lang(filename, metadata, error):

-     """ From a po file and its medata, choose the most likely language code """

-     """ By priority: the Language medata """

-     """ Returns: a language code """

- 

-     lang = ""

-     file_name = filename.lower()

-     meta_language = ""

-     meta_team = ""

-     try:

-         meta_language = metadata.get("Language").lower()

-     except AttributeError:

-         pass

- 

-     try:

-         meta_team = metadata.get("Language-Team").lower()

-     except AttributeError:

-         pass

- 

-     if meta_language in language_codes.LANGUAGES:

-         lang = meta_language

- 

-     elif file_name in language_codes.LANGUAGES:

-         lang = file_name

-     else:

-         lang = "noresult"

- 

-     # try languages (some codes here are exclused from languages_codes)

-     if lang == "noresult":

-         loc = [ lang[0] for lang in languages.LANGUAGES ]

- 

-         if meta_language in loc:

-             lang = meta_language

-         elif file_name in loc:

-             lang = file_name

- 

-     # try ALIASES

-     if lang == "noresult":

-         if meta_language in aliases.ALIASES.keys():

-             lang = aliases.ALIASES[meta_language]

-         elif file_name in aliases.ALIASES.keys():

-             lang = aliases.ALIASES[file_name]

-         else:

-             lang = "error"

- 

-     return lang

- 

- def compute_lang(lang, langfiles, tm_folder):

+ def compute_lang(lang, langfiles, tm_folder, refresh):

      """ Generate compendium and convert it to tmx """

      """  """

-     print("Computing: " + lang)

+     print(" Computing: " + lang)

  

      # po consolidation

      compendium_file = tm_folder + lang + ".po"

      compendium_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), compendium_file)

  

-     pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)),f) for f in langfiles]

- 

-     count = 0

+     if not os.path.isfile(compendium_file) or refresh is True:

+         pofiles = [os.path.join(os.path.dirname(os.path.abspath(__file__)),f) for f in langfiles]

+         count = 0

  

-     with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:

-         for i in pofiles:

-             try:

-                 command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]

-                 subprocess.run(command, check=True, cwd=tmp, capture_output=True)

-             except subprocess.CalledProcessError as e:

+         with tempfile.TemporaryDirectory(prefix="l10n-tm") as tmp:

+             for i in pofiles:

                  try:

-                     command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]

+                     command = ["msguniq", i, "--output-file", count.__str__(), "--no-location"]

                      subprocess.run(command, check=True, cwd=tmp, capture_output=True)

                  except subprocess.CalledProcessError as e:

-                     print("Error with msguniq {i}, error: {e}".format(i=i, e=e))

+                     try:

+                         command = ["msguniq", i, "--output-file", count.__str__(), "--to-code", "utf-8", "--no-location"]

+                         subprocess.run(command, check=True, cwd=tmp, capture_output=True)

+                     except subprocess.CalledProcessError as e:

+                         print("Error with msguniq {i}, error: {e}".format(i=i, e=e))

  

-             count += 1

+                 count += 1

  

-         onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]

-         command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles

+             onlyfiles = [f for f in os.listdir(tmp) if os.path.isfile(os.path.join(tmp, f))]

+             command = ["msgcat", "--force-po", "--no-location", "--output-file", compendium_file] + onlyfiles

  

-         try:

-             subprocess.run(command, check=True, cwd=tmp, capture_output=True)

-         except subprocess.CalledProcessError as e:

-             print(" msgcat exception...")

+             try:

+                 subprocess.run(command, check=True, cwd=tmp, capture_output=True)

+             except subprocess.CalledProcessError as e:

+                 print(" msgcat exception...")

  

  

      # po to tmx convertion

      tmx_file = tm_folder + lang + ".tmx"

      command = ["po2tmx", "--language="+lang, "--progress=none",

                 compendium_file, "--output="+tmx_file]

-     subprocess.run(command, check=True, capture_output=True)

+     if not os.path.isfile(tmx_file) or refresh is True:

+         subprocess.run(command, check=True, capture_output=True)

  

      # language terminology

      terminology_file = tm_folder + lang + ".terminology.po"

      command = ["poterminology", "--ignore-case", "--fold-titlecase",

                  "--inputs-needed", "1",

                  "--progress=none", compendium_file, "--output="+terminology_file]

-     subprocess.run(command, check=True, capture_output=True)

+     if not os.path.isfile(tmx_file) or refresh is True:

+         subprocess.run(command, check=True, capture_output=True)

  

  if __name__ == '__main__':

      main()

no initial comment

rebased onto 83b997c

3 years ago

1 new commit added

  • move language detection in a dedicated file
3 years ago

2 new commits added

  • only generate if no file and add refresh option
  • use aliases to reduce the number of languages
3 years ago

1 new commit added

  • mising lower on some language code
3 years ago

Pull-Request has been merged by jibecfed

3 years ago