#14 add translation memories to website generation
Merged 3 years ago by jibecfed. Opened 3 years ago by jibecfed.

file modified
+1
@@ -3,4 +3,5 @@ 

  results/

  website/content/*

  website/public/

+ website/static/*

  srpms_*.lst

file modified
+9 -1
@@ -89,7 +89,7 @@ 

      files = []

      results = dict()

      with open(os.path.join(lang_folder, analized_lang + ".json"), "r") as read_file:

-         files = json.load(read_file)

+         files = json.load(read_file)["po"]

  

      print(" Analysing language {l}, with {c} files".format(l=analized_lang, c=len(files)))

  
@@ -218,6 +218,12 @@ 

          # 1 is language name

          codes[language[1].lower()] = language[0].lower()

  

+     code_capitalized = dict()

+     for language in languages.LANGUAGES:

+         # 0 is language code

+         # 1 is language name

+         code_capitalized[language[0].lower()] = language[0]

+ 

      file_name = filename.lower().replace("-", "_")

      meta_language = metadata.get("Language", "").lower().replace("-", "_")

  
@@ -267,6 +273,8 @@ 

      if lang in aliases.ALIASES.keys():

          lang = aliases.ALIASES[lang].lower()

  

+     lang = code_capitalized.get(lang, lang)

+ 

      return lang, decision

  

  

file modified
+46 -77
@@ -5,10 +5,10 @@ 

  import glob

  import json

  import os

- import subprocess

  import shutil

  import tempfile

  

+ from translate.tools.pocount import calcstats

  

  def main():

      """Handle params"""
@@ -30,119 +30,88 @@ 

      packages = [f for f in os.listdir(packages_folder) if os.path.isdir(os.path.join(packages_folder, f))]

      count = 0

  

+     if not os.path.exists(packages_stats_folder):

+         os.makedirs(packages_stats_folder)

+ 

      for package in sorted(packages):

          count += 1

          print(" {c}/{t} - {p}".format(c=count, t=len(packages), p=package))

-         with open(os.path.join(packages_folder, package, "discover.json"), 'r') as f:

+         with open(os.path.join(packages_folder, package, "discover.json"), "r") as f:

              discoveries = json.load(f)

  

          src_folder = os.path.join(packages_folder, package)

-         dest_folder = os.path.join(packages_stats_folder, package)

-         if not os.path.exists(dest_folder):

-             os.makedirs(dest_folder)

- 

-         stats_file = os.path.join(dest_folder, "stats.csv")

-         error_file = os.path.join(dest_folder, "stats.errors.txt")

+         stats_file = os.path.join(packages_stats_folder, package + ".json")

  

          if os.path.isfile(stats_file):

              continue

  

+         results = dict()

          for discover in discoveries:

              files = glob.glob(os.path.join(src_folder, discover["filemask"]))

  

              if discover["file_format"] == "po":

-                 get_po_translation_level(files, stats_file, error_file)

-             elif discover["file_format"] == "json":

-                 get_json_translation_level(files, os.path.join(src_folder, discover["template"]), stats_file, error_file)

+                 results[discover["filemask"]] = get_po_translation_level(files, stats_file)

  

-     print(" Removing duplicates")

-     count = 0

-     for package in sorted(packages):

-         count += 1

-         print(" {c}/{t} - {p}".format(c=count, t=len(packages), p=package))

-         input_file = packages_folder + "{p}/stats.csv".format(p=package)

+         if len(results) > 0:

+             with open(stats_file, "w") as f:

+                 json.dump(results, f, indent=2)

  

-         try:

-             with open(input_file, 'r') as f:

-                 lines = f.readlines()

- 

-             seen_lines = set()

-             with open(input_file, 'w') as f:

-                 for line in lines:

-                     if line not in seen_lines:

-                         seen_lines.add(line)

-                         f.write(line)

-         except FileNotFoundError:

-             continue

  

      print("Computing language stats")

      languages = [f for f in os.listdir(languages_folder)]

      count = 0

  

-     dest_folder = languages_stats_folder

+     languages_stats_folder = languages_stats_folder

+     if not os.path.exists(languages_stats_folder):

+         os.makedirs(languages_stats_folder)

  

      for language in sorted(languages):

          count += 1

          lang = language[:-5]

  

          print(" {c}/{t} - {l}".format(c=count, t=len(languages), l=lang))

-         with open(os.path.join(languages_folder, language), 'r') as f:

+         with open(os.path.join(languages_folder, language), "r") as f:

              discoveries = json.load(f)

  

-         stats_file = os.path.join(dest_folder, lang + ".stats.csv")

-         error_file = os.path.join(dest_folder, lang + ".stats.errors.txt")

+         stats_file = os.path.join(languages_stats_folder, lang + ".json")

  

          if os.path.isfile(stats_file):

              continue

  

          files = discoveries.get("po", [])

          if files:

-             get_po_translation_level(files, stats_file, error_file)

+             with open(stats_file, "w") as f:

+                 json.dump(get_po_translation_level(files, stats_file), f, indent=2)

  

  

- def get_po_translation_level(files,  stats_file, error_file):

+ def get_po_translation_level(files, stats_file):

      """ Compute results """

+     stats = dict()

+ 

+     for file in files:

+         try:

+             stat = calcstats(file)

+         except Exception as e:

+             print(" {f} triggered an {t} exception: {e}".format(f=file, t=type(e).__name__, e=e))

+             continue

+ 

+         keys = [

+             "translatedsourcewords",

+             "fuzzysourcewords",

+             "untranslatedsourcewords",

+             "translated",

+             "fuzzy",

+             "untranslated",

+             "translatedtargetwords",

+         ]

+         results = dict()

+         for key in keys:

+             results[key] = stat.get(key, 0)

+ 

+         stats[file] = results

+ 

+     return stats

+ 

  

-     with open(stats_file, 'a') as stats:

-         with open(error_file, 'a') as error:

-             try:

-                 subprocess.run(["pocount", "--csv"] + files,

-                            stdout=stats, stderr=error, check=True)

-             except subprocess.CalledProcessError as e:

-                 print(" Pocount --csv failed.")

-                 print(e)

-                 print(files)

-                 exit()

- 

- 

- def get_json_translation_level(files, template, stats_file, error_file):

-     """ convert json files into po and call get_po_translation_level """

- 

-     # move only related json files to a temporary folder

-     with tempfile.TemporaryDirectory(prefix="l10n-stats") as tmpjson:

-         error = open(error_file, 'a')

-         pofiles = []

-         for filename in files:

-             # if filesare in language subfolder, reproduce the hierarchy

-             dest = filename.replace(os.path.basename(filename), "")

-             os.makedirs(tmpjson + "/" + dest, exist_ok=True)

- 

-             if os.path.isfile(template):

-                 po = os.path.join(tmpjson, filename.replace(".json", ".po"))

-                 subprocess.run(["json2po",

-                                 "-t",

-                                 template,

-                                 filename,

-                                 po,

-                                 "--progress=none"],

-                                stderr=error,

-                                check=True)

-                 pofiles.append(po)

-             else:

-                 print("  {t} missing, translation-finder bug?".format(t=template))

-         error.close()

-         get_po_translation_level(pofiles, stats_file, error_file)

- 

- 

- if __name__ == '__main__':

+ if __name__ == "__main__":

      main()

file modified
+11 -11
@@ -7,7 +7,6 @@ 

  import os

  import subprocess

  import tempfile

- import time

  

  

  def main():
@@ -37,7 +36,6 @@ 

      os.makedirs(tm_folder, exist_ok=True)

  

      print("Building the translation memory for every languages")

-     start_time_search = time.time()

  

      if args.lang:

          with open(os.path.join(lang_path, args.lang + ".json"), "r") as read_file:
@@ -61,9 +59,6 @@ 

          print("Compressing files")

          compress(tm_folder)

  

-     search_duration = round(time.time() - start_time_search, 1)

-     print(" Done in {d} seconds".format(d=search_duration))

- 

  

  def compute_lang(lang, langfiles, tm_folder, refresh):

      """ Generate compendium and convert it to tmx """
@@ -71,7 +66,7 @@ 

      print(" Computing: " + lang)

  

      # po consolidation

-     compendium_file = tm_folder + lang + ".po"

+     compendium_file = os.path.join(tm_folder, lang + ".po")

      compendium_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), compendium_file)

  

      if not os.path.isfile(compendium_file) or refresh is True:
@@ -101,14 +96,14 @@ 

                  print(" msgcat exception...")

  

      # po to tmx convertion

-     tmx_file = tm_folder + lang + ".tmx"

+     tmx_file = os.path.join(tm_folder, lang + ".tmx")

      command = ["po2tmx", "--language="+lang, "--progress=none",

                 compendium_file, "--output="+tmx_file]

      if not os.path.isfile(tmx_file) or refresh is True:

          subprocess.run(command, check=True, capture_output=True)

  

      # language terminology

-     terminology_file = tm_folder + lang + ".terminology.po"

+     terminology_file = os.path.join(tm_folder, lang + ".terminology.po")

      command = ["poterminology", "--ignore-case", "--fold-titlecase",

                 "--inputs-needed", "1",

                 "--progress=none", compendium_file, "--output=" + terminology_file]
@@ -119,9 +114,9 @@ 

  def check_lang(lang, tm_folder):

      """ make sure the files were generated """

  

-     compendium_file = tm_folder + lang + ".po"

-     tmx_file = tm_folder + lang + ".tmx"

-     terminology_file = tm_folder + lang + ".terminology.po"

+     compendium_file = os.path.join(tm_folder, lang + ".po")

+     tmx_file = os.path.join(tm_folder, lang + ".tmx")

+     terminology_file = os.path.join(tm_folder, lang + ".terminology.po")

  

      if not os.path.isfile(compendium_file):

          print(" {l}-compendium is missing".format(l=lang))
@@ -139,7 +134,12 @@ 

      files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

  

      for file in sorted(files):

+         if file.endswith(".gz"):

+             continue

+ 

          dest = file + ".gz"

+         if os.path.isfile(os.path.join(folder, dest)):

+             continue

  

          with open(os.path.join(folder, file), "rb") as file_in:

              with gzip.open(os.path.join(folder, dest), "wb") as file_out:

file modified
+129 -87
@@ -2,8 +2,10 @@ 

  """Consolidate each po files into compendium"""

  

  import argparse

+ import datetime

  import jinja2

  import json

+ import langtable

  import os

  import pandas as pd

  import shutil
@@ -19,6 +21,9 @@ 

                          choices=[30, 31, 32],

                          help="Provide the Fedora release to analyze")

  

+     parser.add_argument("--refresh", action="store_true",

+                         help="Force refresh of files")

+ 

      args = parser.parse_args()

  

      release_folder = "./results/f{v}/".format(v=args.release)
@@ -30,164 +35,196 @@ 

      data_langs_folder = os.path.join(data_folder, "languages")

      data_pkgs_folder = os.path.join(data_folder, "packages")

  

+     tm_folder = os.path.join(release_folder, "languages-tm")

+ 

      static_folder = "./website/content/f{v}/".format(v=args.release)

      static_langs_folder = os.path.join(static_folder, "language")

      static_pkgs_folder = os.path.join(static_folder, "package")

+     static_tm_folder =  "./website/static/f{v}/".format(v=args.release)

  

      # clean destination folders

-     for folder in [data_langs_folder, data_pkgs_folder, static_langs_folder, static_pkgs_folder]:

-         if os.path.isdir(folder):

+     for folder in [data_langs_folder, data_pkgs_folder, static_langs_folder, static_pkgs_folder, static_tm_folder]:

+         if args.refresh and os.path.isdir(folder):

              shutil.rmtree(folder)

  

-         os.makedirs(folder)

+         os.makedirs(folder, exist_ok=True)

  

-     # prepare json files for packages

-     print("prepare json files for packages")

-     packages = [d for d in os.listdir(packages_stats) if os.path.isdir(os.path.join(packages_stats, d))]

+     print("Prepare json files for packages")

+     packages = [d for d in os.listdir(packages_stats) if os.path.isfile(os.path.join(packages_stats, d))]

      log_files = pd.read_csv(langs_log, header=None, skipinitialspace=True)

      log_files = log_files.iloc[:, [0, 4]]

-     log_files.columns = ["Filename", "lang_code"]

+     log_files.columns = ["filename", "lang_code"]

  

      packages_langs_results = dict()

      for package in sorted(packages):

-         file_stats = os.path.join(packages_stats, package, "stats.csv")

-         if not os.path.isfile(file_stats):

-             print(" Package: {p} missing stats file {f}".format(p=package, f=file_stats))

-             continue

+         name = package[:-len(".json")]

+         dest_file = os.path.join(data_pkgs_folder, name + ".json")

+         file_stats = os.path.join(packages_stats, name + ".json")

  

          results = consolidate_package_stats(file_stats, log_files)

-         store_json_file(package, results, data_pkgs_folder)

+         store_json_file(results, dest_file)

  

          langs_results = results.get("equalsormorethan80percent", []) + results.get("between50and80percent", []) + results.get("lessorequalto50percent", [])

  

          for langs in langs_results:

              val = packages_langs_results.get(langs["lang_code"], [])

-             val.append({"name": package, "progress": langs["progress"]})

+             val.append({"name": name, "progress": langs["progress"]})

              packages_langs_results[langs["lang_code"]] = val

  

-     # prepare json files for languages

-     print("prepare json files for languages")

+     print("Prepare json files for languages")

      langs = [f for f in os.listdir(langs_stats) if os.path.isfile(os.path.join(langs_stats, f))]

      for lang in sorted(langs):

-         if lang.endswith(".stats.csv"):

-             code = lang[:-len(".stats.csv")]

+         if lang.endswith(".json"):

+             code = lang[:-len(".json")]

+             dest_file = os.path.join(data_langs_folder, code + ".json")

+ 

+             if os.path.isfile(dest_file):

+                 continue

+ 

              results = consolidate_language_stats(os.path.join(langs_stats, lang))

              results["packages"] = packages_langs_results.get(code, dict())

-             store_json_file(code, results, data_langs_folder)

+             store_json_file(results, dest_file)

  

      # generate static content for languages

-     print("generate static content for languages")

+     print("Generate static content for languages")

      langs = [f for f in os.listdir(data_langs_folder) if os.path.isfile(os.path.join(data_langs_folder, f))]

      for lang in sorted(langs):

          code = lang[:-len(".json")]

-         dest = os.path.join(static_langs_folder, code + ".md")

+         dest_file = os.path.join(static_langs_folder, code + ".md")

+ 

+         if os.path.isfile(dest_file):

+             continue

+ 

          with open(os.path.join(data_langs_folder, lang), "r") as read_file:

              content = json.load(read_file)

  

-         generate_static_pages_langs(args.release, code, content, dest)

+         generate_static_pages_langs(args.release, code, content, dest_file)

  

-     print("generate static content for packages")

-     # generate static content for packages

+     print("Generate static content for packages")

      packages = [f for f in os.listdir(data_pkgs_folder) if os.path.isfile(os.path.join(data_pkgs_folder, f))]

      for package in sorted(packages):

          code = package[:-len(".json")]

-         dest = os.path.join(static_pkgs_folder, code + ".md")

+         dest_file = os.path.join(static_pkgs_folder, code + ".md")

+ 

+         if os.path.isfile(dest_file):

+             continue

+ 

          with open(os.path.join(data_pkgs_folder, package), "r") as read_file:

              content = json.load(read_file)

  

-         generate_static_pages_packages(args.release, code, content, dest)

+         generate_static_pages_packages(args.release, code, content, dest_file)

+ 

+     print("Copy translation memories")

+     langs = [f for f in os.listdir(tm_folder) if os.path.isfile(os.path.join(tm_folder, f))]

+     for lang in langs:

+         if lang.endswith(".gz"):

+             shutil.copyfile(os.path.join(tm_folder, lang), os.path.join(static_tm_folder, lang))

  

  

- def consolidate_language_stats(csv_file):

+ def consolidate_language_stats(stats_file):

      """ From a CSV file, return key indicators """

      results = dict()

  

-     fieldnames = {"Filename": "str",

-                   "TranslatedMessages": "int",

-                   "TranslatedSourceWords": "int",

-                   "TranslatedTargetWords": "int",

-                   "FuzzyMessages": "int",

-                   "FuzzySourceWords": "int",

-                   "UntranslatedMessages": "int",

-                   "UntranslatedSource Words": "int",

-                   "TotalMessage": "int",

-                   "TotalSourceWords": "int",

-                   "ReviewMessages": "int",

-                   "ReviewSourceWords": "int"}

- 

-     stats_df = pd.read_csv(csv_file, header=0, skipinitialspace=True)

+     fieldnames = {

+         "filename": "str",

+         "translatedsourcewords": "int",

+         "fuzzysourcewords": "int",

+         "untranslatedsourcewords": "int",

+         "translated": "int",

+         "fuzzy": "int",

+         "untranslated": "int",

+         "translatedtargetwords": "int",

+         "totalsourcewords": "int"

+     }

+ 

+     stats_df = pd.read_json(stats_file, orient="index")

      stats_df.fillna(0, inplace=True)

+     stats_df.reset_index(level=0, inplace=True)

+     stats_df["totalsourcewords"] = stats_df["untranslatedsourcewords"] + stats_df["translatedsourcewords"]

      stats_df.columns = fieldnames.keys()

  

-     stats_df["package"] = stats_df["Filename"].str.split("/", expand=True)[4]

+     stats_df["package"] = stats_df["filename"].str.split("/", expand=True)[4]

  

      results["packages"] = stats_df["package"].unique().tolist()

-     results["progress"] = round(stats_df["TranslatedSourceWords"].sum() / stats_df["TotalSourceWords"].sum() * 100, 1)

+     results["progress"] = round(stats_df["translatedsourcewords"].sum() / stats_df["totalsourcewords"].sum() * 100, 1)

  

-     for kpi in ["TotalSourceWords", "TranslatedSourceWords"]:

-         results[kpi + "Sum"] = int(stats_df[kpi].sum())

+     for kpi in ["totalsourcewords", "translatedsourcewords"]:

+         results[kpi + "sum"] = int(stats_df[kpi].sum())

  

      return results

  

  

- def consolidate_package_stats(csv_file, log_files):

+ def consolidate_package_stats(stats_file, log_files):

      """ From a CSV file, return key indicators """

      results = dict()

  

-     fieldnames = {"Filename": "str",

-                   "TranslatedMessages": "int",

-                   "TranslatedSourceWords": "int",

-                   "TranslatedTargetWords": "int",

-                   "FuzzyMessages": "int",

-                   "FuzzySourceWords": "int",

-                   "UntranslatedMessages": "int",

-                   "UntranslatedSource Words": "int",

-                   "TotalMessage": "int",

-                   "TotalSourceWords": "int",

-                   "ReviewMessages": "int",

-                   "ReviewSourceWords": "int"}

- 

-     try:

-         stats_df = pd.read_csv(csv_file, header=0, skipinitialspace=True)

-     except pd.errors.EmptyDataError as e:

-         print(" File {f} raised {e}".format(f=csv_file, e=e))

-         return results

+     fieldnames = {

+         "filename": "str",

+         "translatedsourcewords": "int",

+         "fuzzysourcewords": "int",

+         "untranslatedsourcewords": "int",

+         "translated": "int",

+         "fuzzy": "int",

+         "untranslated": "int",

+         "translatedtargetwords": "int",

+         "totalsourcewords": "int"

+     }

+ 

+     _json = json.load(open(stats_file))

+     dfs = []

+     total_source_words = 0

+ 

+     for template in _json.keys():

+         tmp_df = pd.DataFrame.from_dict(_json.get(template), orient="index")

+         tmp_df.fillna(0, inplace=True)

+         tmp_df.reset_index(level=0, inplace=True)

+ 

+         # sometimes, no file were found, which means no stats can be used

+         if len(tmp_df) == 0:

+             print(" The template {t} for {f} is empty".format(t=template, f=stats_file))

+             continue

  

-     stats_df.fillna(0, inplace=True)

-     stats_df.columns = fieldnames.keys()

+         tmp_df["totalsourcewords"] = tmp_df["untranslatedsourcewords"] + tmp_df["translatedsourcewords"]

+         tmp_df.columns = fieldnames.keys()

  

-     stats_df_w_lang = pd.merge(stats_df, log_files, how="inner", on="Filename")

-     stats_df_no_lang = pd.merge(stats_df, log_files, how="outer", indicator=True).loc[lambda x: x["_merge"] == "left_only"]

+         total_source_words += max(tmp_df["totalsourcewords"])

+ 

+         dfs.append(tmp_df)

  

-     try:

-         total_source_words = int(max(stats_df_w_lang["TotalSourceWords"]))

-     except ValueError as e:

-         print(" File {f} raised ValueError {e}".format(f=csv_file, e=e))

+     if len(dfs) > 1:

+         stats_df = pd.concat(dfs)

+     elif len(dfs) == 0:

+         print("There is no stats for {f}".format(f=stats_file))

          return results

+     else:

+         stats_df = dfs[0]

+ 

+     stats_df_w_lang = pd.merge(stats_df, log_files, how="inner", on="filename")

+     stats_df_no_lang = pd.merge(stats_df, log_files, how="outer", indicator=True).loc[lambda x: x["_merge"] == "left_only"]

  

-     temp = stats_df_w_lang.groupby(["lang_code"]).agg({"TranslatedSourceWords": ["sum"], }).reset_index().droplevel(1, axis=1).to_dict(orient="records")

+     temp = stats_df_w_lang.groupby(["lang_code"]).agg({"translatedsourcewords": ["sum"], }).reset_index().droplevel(1, axis=1).to_dict(orient="records")

      for line in temp:

          line["progress"] = 0

          p = 0

          if total_source_words == 0:

-             print(" File {f} has TranslatedSourceWords = 0".format(f=csv_file))

+             print(" File {f} for file has translatedsourcewords = 0 in line {l}".format(f=stats_file, l=line))

              line["progress"] = p

              continue

          try:

-             p = round((int(line["TranslatedSourceWords"]) / total_source_words)*100)

+             p = round((int(line["translatedsourcewords"]) / total_source_words)*100)

          except OverflowError:

              print(" File {f} has Translated={t} and Source={tot}".format(

-                 f=csv_file,

-                 t=line["TranslatedSourceWords"],

+                 f=stats_file,

+                 t=line["translatedsourcewords"],

                  tot=total_source_words))

  

          line["progress"] = p

  

-     results["TotalSourceWords"] = total_source_words

+     results["totalsourcewords"] = total_source_words

      results["count_languages"] = len(pd.unique(stats_df_w_lang["lang_code"]))

  

      for line in sorted(temp, key=lambda k: k["progress"], reverse=True):

-         del line["TranslatedSourceWords"]

+         del line["translatedsourcewords"]

          if line["progress"] <= 50:

              hop = results.get("lessorequalto50percent", [])

              hop.append(line)
@@ -201,43 +238,48 @@ 

              hop.append(line)

              results["equalsormorethan80percent"] = hop

  

-     results["no_languages"] = stats_df_no_lang["Filename"].tolist()

+     results["no_languages"] = stats_df_no_lang["filename"].tolist()

  

      return results

  

  

- def generate_static_pages_langs(release, code, content, dest):

+ def generate_static_pages_langs(release, code, content, dest_file):

      data = content

+     data["lang_name_en"] = langtable.language_name(languageId = code, languageIdQuery = "en")

+     data["lang_name_local"] = langtable.language_name(languageId = code)

+     data["scripts"] = langtable.list_scripts(languageId = code)

      data["release"] = release

      data["lang_code"] = code

+     data["now"] = datetime.datetime.utcnow()

  

      templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader)

+     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

      TEMPLATE_FILE = "language.md"

      template = templateEnv.get_template(TEMPLATE_FILE)

      outputText = template.render(data)

  

-     with open(dest, "w") as write_out:

+     with open(dest_file, "w") as write_out:

          write_out.write(outputText)

  

  

- def generate_static_pages_packages(release, code, content, dest):

+ def generate_static_pages_packages(release, code, content, dest_file):

      data = content

      data["release"] = release

      data["package"] = code

+     data["now"] = datetime.datetime.utcnow()

  

      templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader)

+     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

      TEMPLATE_FILE = "package.md"

      template = templateEnv.get_template(TEMPLATE_FILE)

      outputText = template.render(data)

  

-     with open(dest, "w") as write_out:

+     with open(dest_file, "w") as write_out:

          write_out.write(outputText)

  

  

- def store_json_file(code, content, dest):

-     with open(os.path.join(dest, code + ".json"), "w") as f:

+ def store_json_file(content, dest_file):

+     with open(dest_file, "w") as f:

          f.write(json.dumps(content, indent=2))

  

  

file modified
+3
@@ -4,6 +4,9 @@ 

  

  COPY requirements.txt /src/requirements.txt

  RUN pip3 install --no-cache -r /src/requirements.txt

+ RUN pip3 install --upgrade https://github.com/WeblateOrg/language-data/archive/master.zip

+ RUN pip3 install charamel

+ RUN pip3 install git+https://github.com/WeblateOrg/translation-finder.git

  

  # Fix missing metalink for f30

  COPY docker/fedora-updates-modular.repo /etc/yum.repos.d/fedora-updates-modular.repo

file modified
+1
@@ -1,3 +1,4 @@ 

  pandas

  polib

  weblate-language-data

+ langtable

file modified
+6 -2
@@ -4,10 +4,13 @@ 

  

  # this file is useful for end to end tests on a short corpus

  rm -rf ./results/f32/

+ rm -rf ./website/static/*

+ rm -rf ./website/content/*

  

  # parcourir tous les fichiers rpm d'une version et en extraire tous les fichiers de traduction

  # ~ 3 h (without downloading time)

  # time podman run -it --rm -v ./:/src:z -v ./srpms:/srpms:z --tmpfs /tmp:size=4G fedlocstats:32 /src/build.py --keep-srpms gco.*

+ # time podman run -it --rm -v ./:/src:z -v ./srpms:/srpms:z --tmpfs /tmp:size=4G fedlocstats:32 /src/build.py --keep-srpms col.*

  time podman run -it --rm -v ./:/src:z -v ./srpms:/srpms:z --tmpfs /tmp:size=4G fedlocstats:32 /src/build.py --keep-srpms

  

  # déduire la liste de toutes les langues
@@ -19,15 +22,16 @@ 

  time ./build_language_list.py --release 32 --analyzealllang

  

  # générer par langue un compendium, une mémoire de traduction et une terminologie

- # ~ 3 h

+ # ~ 3 h 20

  time ./build_tm.py --release 32 --compress

  

  # calculer des pourcentages d'avancement par paquet et langue

  # ~ 

  time ./build_stats.py --release 32

  

+ rm -rf ~/.translate_toolkit/

  # générer le site statique

- # 

+ # ~ 7 m

  time ./build_website.py --release 32

  

  (

file modified
+14 -5
@@ -1,14 +1,23 @@ 

  ---

- title: "{{ lang_code }}"

- date: 2020-11-18T18:20:46+01:00

+ title: "{{ lang_name_en }} ({{ lang_name_local }})"

+ date: {{ now }}

  ---

- Global progress for {{ lang_code }} in Fedora {{ release }} is {{ progress }}%.

+ 

+ Global progress for {{ lang_name_en }} ({{ lang_code }}) in Fedora {{ release }} is {{ progress }}%.

+ 

+ Possible scripts are: {% for script in scripts -%}{{ script }} {%- endfor %}

  

  | Source words to translate  | Translated words |

  |---------------------------:|-----------------:|

- | {{ TotalSourceWordsSum }}  | {{ TranslatedSourceWordsSum }} |

+ | {{ totalsourcewordssum }}  | {{ translatedsourcewordssum }} |

+ 

+ Download:

+ 

+ * {{ "{{%" }} link "/f32/{{ lang_code }}.po.gz" {{ "%}}" }}{{ lang_code }} compendium{{ "{{%" }} /link {{ "%}}" }} (aggregation of all strings found in po files)

+ * {{ "{{%" }} link "/f32/{{ lang_code }}.terminology.po.gz" {{ "%}}" }}{{ lang_code }} terminology{{ "{{%" }} /link {{ "%}}" }} see [poterminology](https://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/poterminology.html)

+ * {{ "{{%" }} link "/f32/{{ lang_code }}.tmx.gz" {{ "%}}" }}{{ lang_code }} translation memory{{ "{{%" }} /link {{ "%}}" }} see [tmx](https://en.wikipedia.org/wiki/Translation_Memory_eXchange)

  

  Packages:

- {% for package in packages %}

+ {% for package in packages -%}

  * [{{ package.name }}]({{ '{{' }}< ref "/f{{ release }}/package/{{ package.name }}.md" >{{ '}}' }}) ({{ package.progress }})

  {% endfor %}

file modified
+20 -3
@@ -1,26 +1,43 @@ 

  ---

  title: "{{ package }}"

- date: 2020-11-18T18:20:46+01:00

+ date: {{ now }}

  ---

  The package {{ package }} is transtlated into {{ count_languages }} languages in Fedora {{ release }}.

  

- ## Languages with ≥80% words translated 

+ ## Languages with ≥80% words translated

+ {% if equalsormorethan80percent %} 

  {% for stat in equalsormorethan80percent -%}

  [{{ stat.lang_code }}]({{ '{{' }}< ref "/f{{ release }}/language/{{ stat.lang_code }}.md" >{{ '}}' }}) ({{ stat.progress }})

  {% endfor %}

+ {% else %}

+ None

+ {% endif %}

  

  ## Languages with >50% and <80% words translated

+ {% if between50and80percent %} 

  {% for stat in between50and80percent -%}

  [{{ stat.lang_code }}]({{ '{{' }}< ref "/f{{ release }}/language/{{ stat.lang_code }}.md" >{{ '}}' }}) ({{ stat.progress }})

  {% endfor %}

+ {% else %}

+ None

+ {% endif %}

+ 

  

  ## Languages with ≤50% words translated

+ {% if lessorequalto50percent %} 

  {% for stat in lessorequalto50percent -%}

  [{{ stat.lang_code }}]({{ '{{' }}< ref "/f{{ release }}/language/{{ stat.lang_code }}.md" >{{ '}}' }}) ({{ stat.progress }})

  {% endfor %}

+ {% else %}

+ None

+ {% endif %}

  

- 

+ ## Errors

+ {% if no_languages %}

  List of files for which language detection were impossible:

  {% for missing in no_languages -%}

  * {{ missing }}

  {% endfor %}

+ {% else %}

+ None

+ {% endif %}

file modified
+3 -6
@@ -1,14 +1,13 @@ 

  # global

  

  support for json files

- stats computation in CSV isn't so useful, maybe a direct storage in JSON would make more sense.

- results should be stored by discovered translation files, so that progress computation per language makes sens both at package level and file level.

+ default behavior: continue computing, and refresh force to re-compute 

  

  # optimization

  

  direct call to:

- - pocount: https://github.com/translate/translate/blob/master/translate/tools/pocount.py

- - 

+ * po2tmx

+ * poterminology

  

  # build_tm.py

  
@@ -27,5 +26,3 @@ 

  # global

  

  

- 

- we may detect anomalies

file modified
+4
@@ -2,3 +2,7 @@ 

  languageCode = "en-us"

  title = "Temporary demo"

  theme = "ananke"

+ staticDir = "static"

+ 

+ [markup.goldmark.renderer]

+ unsafe= true

@@ -0,0 +1,1 @@ 

+ <a href="{{ .Site.BaseURL }}{{ .Get 0 }}"/>{{ .Inner }}</a>

no initial comment

1 new commit added

  • use direct call to pocount and store result in json
3 years ago

2 new commits added

  • use filemask to store stats
  • simplify packages stats storage
3 years ago

1 new commit added

  • add language files in websites
3 years ago

1 new commit added

  • add language name in generated pages and generation date
3 years ago

1 new commit added

  • add pip depedencies for Fedora 30
3 years ago

Pull-Request has been merged by jibecfed

3 years ago