#44 store language results as csv and display file size
Merged 9 months ago by jibecfed. Opened 9 months ago by jibecfed.

file modified
+61 -80
@@ -3,6 +3,7 @@ 

  

  import argparse

  import datetime

+ from collections import defaultdict

  

  import jinja2

  import json
@@ -92,17 +93,17 @@ 

      for package in sorted(packages):

          count += 1

          log.debug("Preparing package {c}/{t} - {p}".format(c=count, t=total, p=package))

-         name = package[: -len(".json")]

-         dest_file = os.path.join(data_pkgs_folder, name + ".json")

-         file_stats = os.path.join(packages_stats, name + ".json")

+         package_name = package[: -len(".json")]

+         package_statistics_file = os.path.join(data_pkgs_folder, package_name + ".json")

+         file_stats = os.path.join(packages_stats, package_name + ".json")

  

-         results = consolidate_package_stats(file_stats, log_files, os.path.join(results_folder, "package", name))

-         store_json_file(results, dest_file)

+         results = consolidate_package_stats(file_stats, log_files, os.path.join(results_folder, "package", package_name))

+         store_json_file(results, package_statistics_file)

          for lang in results.get("stats", []):

              val = packages_langs_results.get(lang["lang_code"], [])

              val.append(

                  {

-                     "name": name,

+                     "name": package_name,

                      "progress": lang["progress"],

                      "translated": lang["translated"],

                      "team": lang["team"],
@@ -119,14 +120,14 @@ 

      for lang in sorted(languages):

          if lang.endswith(".json"):

              code = lang[: -len(".json")]

-             dest_file = os.path.join(data_langs_folder, code + ".json")

+             package_statistics_file = os.path.join(data_langs_folder, code + ".json")

  

-             if os.path.isfile(dest_file):

+             if os.path.isfile(package_statistics_file):

                  continue

  

              results = consolidate_language_stats(os.path.join(langs_stats, lang), distribution_stats)

              results["packages"] = packages_langs_results.get(code, dict())

-             store_json_file(results, dest_file)

+             store_json_file(results, package_statistics_file)

  

      log.info("Load CLDR data")

      with open("CLDR-raw/languageData.json", "r") as read_file:
@@ -149,14 +150,16 @@ 

      ]

      for lang in sorted(languages):

          code = lang[: -len(".json")]

-         dest_file = os.path.join(static_langs_folder, code + ".adoc")

+         package_statistics_file = os.path.join(static_langs_folder, code + ".adoc")

  

-         if os.path.isfile(dest_file):

+         if os.path.isfile(package_statistics_file):

              continue

  

          with open(os.path.join(data_langs_folder, lang), "r") as read_file:

              content = json.load(read_file)

  

+         pd.DataFrame(content["packages"]).to_csv(os.path.join(static_tm_folder, f"{code}.csv"), index=False)

+ 

          cldr_code = code.split("_", 1)[0]  # ro_MD or zh_Hant_HK

          cldr_code = cldr_code.split("@", 1)[0]  # ca@valencia

  
@@ -170,7 +173,7 @@ 

  

          if len(territories) == 0:

              log.warning("The language {l} does not exist in territories data from CLDR".format(l=code))

-         generate_static_pages_langs(args.results, code, content, dest_file, territories)

+         generate_static_pages_langs(args.results, code, content, package_statistics_file, territories, tm_folder, static_tm_folder)

  

      log.info("Generate static content for packages")

      packages = [
@@ -180,31 +183,31 @@ 

      ]

      for package in sorted(packages):

          code = package[: -len(".json")]

-         dest_file = os.path.join(static_pkgs_folder, code + ".adoc")

+         package_statistics_file = os.path.join(static_pkgs_folder, code + ".adoc")

  

-         if os.path.isfile(dest_file):

+         if os.path.isfile(package_statistics_file):

              continue

  

          with open(os.path.join(data_pkgs_folder, package), "r") as read_file:

              content = json.load(read_file)

  

-         generate_static_pages_packages(args.results, code, content, dest_file)

+         generate_static_pages_packages(args.results, code, content, package_statistics_file)

  

      log.info("Generating indexes")

-     dest_file = os.path.join(static_folder, "_index.adoc")

-     generate_release_index(args.results, dest_file, distribution_stats)

+     package_statistics_file = os.path.join(static_folder, "_index.adoc")

+     generate_release_index(args.results, package_statistics_file, distribution_stats)

  

-     dest_file = os.path.join(static_langs_folder, "_index.adoc")

-     generate_language_index(args.results, dest_file)

+     package_statistics_file = os.path.join(static_langs_folder, "_index.adoc")

+     generate_language_index(args.results, package_statistics_file)

  

-     dest_file = os.path.join(static_pkgs_folder, "_index.adoc")

-     generate_package_index(args.results, dest_file)

+     package_statistics_file = os.path.join(static_pkgs_folder, "_index.adoc")

+     generate_package_index(args.results, package_statistics_file)

  

      for code in cldr_territories.keys():

          # prevent containers and alternative names to be included

          if code in cldr_territories_info.keys():

-             dest_file = os.path.join(static_territories_folder, code, "_index.adoc")

-             generate_territory_index(dest_file, cldr_territories[code], code, cldr_territories_info.get(code, {}))

+             package_statistics_file = os.path.join(static_territories_folder, code, "_index.adoc")

+             generate_territory_index(package_statistics_file, cldr_territories[code], code, cldr_territories_info.get(code, {}))

  

      log.info("Copy translation memories")

      languages = [
@@ -384,7 +387,7 @@ 

      return results

  

  

- def generate_static_pages_langs(results, code, content, dest_file, territories):

+ def generate_static_pages_langs(results: str, code: str, content: dict, destination_file: str, territories: list[str], tm_folder: str, static_tm_folder: str) -> None:

      log = logging.getLogger("buildWebsite.generate_static_pages_langs")

      data = content

      data["lang_name_en"] = langtable.language_name(
@@ -395,103 +398,81 @@ 

      data["results"] = results

      data["lang_code"] = code

      data["now"] = datetime.datetime.utcnow()

+     data["files"] = defaultdict(dict)

+     data["files"]["compendium"]["url"] = f"/{results}/{code}.po.gz"

+     data["files"]["compendium"]["size"] = os.path.getsize(os.path.join(tm_folder, f"{code}.po.gz"))

+     data["files"]["terminology"]["url"] = f"/{results}/{code}.terminology.po.gz"

+     data["files"]["terminology"]["size"] = os.path.getsize(os.path.join(tm_folder, f"{code}.terminology.po.gz"))

+     data["files"]["tmx"]["url"] = f"/{results}/{code}.tmx.gz"

+     data["files"]["tmx"]["size"] = os.path.getsize(os.path.join(tm_folder, f"{code}.tmx.gz"))

+     data["files"]["csv"]["url"] = f"/{results}/{code}.csv"

+     data["files"]["csv"]["size"] = os.path.getsize(os.path.join(static_tm_folder, f"{code}.csv"))

      if len(territories) > 0:

          data["territories"] = territories

  

-     templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

-     TEMPLATE_FILE = "language.adoc"

-     template = templateEnv.get_template(TEMPLATE_FILE)

-     outputText = template.render(data)

- 

-     with open(dest_file, "w") as write_out:

-         write_out.write(outputText)

+     apply_jinja_template(data, destination_file, "language.adoc")

  

  

- def generate_static_pages_packages(results, code, content, dest_file):

+ def generate_static_pages_packages(results, code, content, destination_file):

      log = logging.getLogger("buildWebsite.generate_static_pages_packages")

      data = content

      data["results"] = results

      data["package"] = code

      data["now"] = datetime.datetime.utcnow()

  

-     templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

-     TEMPLATE_FILE = "package.adoc"

-     template = templateEnv.get_template(TEMPLATE_FILE)

-     outputText = template.render(data)

- 

-     with open(dest_file, "w") as write_out:

-         write_out.write(outputText)

+     apply_jinja_template(data, destination_file, "package.adoc")

  

  

- def generate_release_index(release, dest_file, data):

+ def generate_release_index(release, destination_file, data):

      log = logging.getLogger("buildWebsite.generate_release_index")

      data["release"] = release

      data["now"] = datetime.datetime.utcnow()

  

-     templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

-     TEMPLATE_FILE = "_index.release.adoc"

-     template = templateEnv.get_template(TEMPLATE_FILE)

-     outputText = template.render(data)

+     apply_jinja_template(data, destination_file, "_index.release.adoc")

  

-     with open(dest_file, "w") as write_out:

-         write_out.write(outputText)

  

- 

- def generate_language_index(release, dest_file):

+ def generate_language_index(release, destination_file):

      log = logging.getLogger("buildWebsite.generate_language_index")

      data = dict()

      data["release"] = release

      data["now"] = datetime.datetime.utcnow()

  

-     templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

-     TEMPLATE_FILE = "_index.language.adoc"

-     template = templateEnv.get_template(TEMPLATE_FILE)

-     outputText = template.render(data)

- 

-     with open(dest_file, "w") as write_out:

-         write_out.write(outputText)

+     apply_jinja_template(data, destination_file, "_index.language.adoc")

  

  

- def generate_package_index(distribution, dest_file):

+ def generate_package_index(distribution, destination_file):

      log = logging.getLogger("buildWebsite.generate_package_index")

      data = dict()

      data["distribution"] = distribution

      data["now"] = datetime.datetime.utcnow()

  

-     templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

-     TEMPLATE_FILE = "_index.package.adoc"

-     template = templateEnv.get_template(TEMPLATE_FILE)

-     outputText = template.render(data)

- 

-     with open(dest_file, "w") as write_out:

-         write_out.write(outputText)

+     apply_jinja_template(data, destination_file, "_index.package.adoc")

  

  

- def generate_territory_index(dest_file: str, name: str, code: str, data: list):

+ def generate_territory_index(destination_file: str, name: list[str], code: str, data: dict):

      log = logging.getLogger("buildWebsite.generate_package_index")

      data["name"] = name

      data["code"] = code

  

-     templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")

-     templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)

-     TEMPLATE_FILE = "_index.territory.adoc"

-     template = templateEnv.get_template(TEMPLATE_FILE)

-     outputText = template.render(data)

+     apply_jinja_template(data, destination_file, "_index.territory.adoc")

  

-     os.makedirs(os.path.dirname(os.path.abspath(dest_file)), exist_ok=True)

-     with open(dest_file, "w") as write_out:

-         write_out.write(outputText)

  

- 

- def store_json_file(content, dest_file):

-     with open(dest_file, "w") as f:

+ def store_json_file(content, destination_file):

+     with open(destination_file, "w") as f:

          f.write(json.dumps(content, indent=2))

  

  

+ def apply_jinja_template(data: dict, destination_file: str, template_file: str):

+     os.makedirs(os.path.dirname(os.path.abspath(destination_file)), exist_ok=True)

+ 

+     template_loader = jinja2.FileSystemLoader(searchpath="./templates/")

+     template_env = jinja2.Environment(loader=template_loader, undefined=jinja2.Undefined)

+     template = template_env.get_template(template_file)

+     output_text = template.render(data)

+ 

+     with open(destination_file, "w") as write_out:

+         write_out.write(output_text)

+ 

+ 

  if __name__ == "__main__":

      main()

file modified
+4 -3
@@ -27,9 +27,10 @@ 

  

  Download:

  

- * link:{{ "{{% resource url=" }}"/{{ results }}/{{ lang_code }}.po.gz" {{ "%}}" }}[{{ lang_code }} compendium] (aggregation of all strings found in po files)

- * link:{{ "{{% resource url=" }}"/{{ results }}/{{ lang_code }}.terminology.po.gz" {{ "%}}" }}[{{ lang_code }} terminology] see https://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/poterminology.html[poterminology]

- * link:{{ "{{% resource url=" }}"/{{ results }}/{{ lang_code }}.tmx.gz" {{ "%}}" }}[{{ lang_code }} translation memory] see https://en.wikipedia.org/wiki/Translation_Memory_eXchange[tmx]

+ * link:{{ "{{% resource url=" }}"{{ files["compendium"]["url"] }}" {{ "%}}" }}[{{ lang_code }} compendium ({{ files["compendium"]["size"]|filesizeformat() }})] (aggregation of all strings found in po files)

+ * link:{{ "{{% resource url=" }}"{{ files["terminology"]["url"] }}" {{ "%}}" }}[{{ lang_code }} terminology ({{ files["terminology"]["size"]|filesizeformat() }})] see https://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/poterminology.html[poterminology]

+ * link:{{ "{{% resource url=" }}"{{ files["tmx"]["url"] }}" {{ "%}}" }}[{{ lang_code }} translation memory ({{ files["tmx"]["size"]|filesizeformat() }})] see https://en.wikipedia.org/wiki/Translation_Memory_eXchange[tmx]

+ * link:{{ "{{% resource url=" }}"{{ files["csv"]["url"] }}" {{ "%}}" }}[{{ lang_code }} generated stats ({{ files["csv"]["size"]|filesizeformat() }})]

  

  Packages:

  

Pull-Request has been merged by jibecfed

9 months ago