#17 measure language progress compare to the whole distribution
Merged 3 years ago by jibecfed. Opened 3 years ago by jibecfed.

file modified
+42
@@ -17,6 +17,8 @@ 

          description="Computes stats for each srpm detected")

      parser.add_argument("--results", required=True,

                          help="Set the results folder to use")

+     parser.add_argument("--refresh", action="store_true",

+                         help="Clear results before computing")

  

      args = parser.parse_args()

  
@@ -24,10 +26,16 @@ 

      packages_stats_folder = "./results/{v}/packages-stats/".format(v=args.results)

      languages_folder = "./results/{v}/languages/".format(v=args.results)

      languages_stats_folder = "./results/{v}/languages-stats/".format(v=args.results)

+     distribution_stats_folder = "./results/{v}/distribution-stats/".format(v=args.results)

+ 

+     for folder in [packages_stats_folder, languages_stats_folder, distribution_stats_folder]:

+         if args.refresh and os.path.isdir(folder):

+             shutil.rmtree(folder)

  

      print("Computing packages stats")

      packages = [f for f in os.listdir(packages_folder) if os.path.isdir(os.path.join(packages_folder, f))]

      count = 0

+     distribution_stats = dict()

  

      if not os.path.exists(packages_stats_folder):

          os.makedirs(packages_stats_folder)
@@ -52,9 +60,20 @@ 

                  results[discover["filemask"]] = get_po_translation_level(files, stats_file)

  

          if len(results) > 0:

+             distribution_stats = extract_release_stats(distribution_stats, results)

+ 

+         if len(results) > 0:

              with open(stats_file, "w") as f:

                  json.dump(results, f, indent=2)

  

+     print("Storing distribution stats")

+     if not os.path.exists(distribution_stats_folder):

+         os.makedirs(distribution_stats_folder)

+ 

+     stats_file = os.path.join(distribution_stats_folder, "release.json")

+     with open(stats_file, "w") as f:

+         json.dump(distribution_stats, f, indent=2)

+ 

  

      print("Computing language stats")

      languages = [f for f in os.listdir(languages_folder)]
@@ -112,5 +131,28 @@ 

      return stats

  

  

+ def extract_release_stats(results, files_stats):

+     number_of_packages = results.get("nb_packages", 0)

+     number_of_packages += 1

+     number_of_files = results.get("nb_files", 0)

+     total_source_words = results.get("totalsourcewords", 0)

+ 

+     for template in files_stats:

+         maxresult = 0

+         for file in files_stats[template]:

+             translated = files_stats[template][file]["translatedsourcewords"]

+             untranslated = files_stats[template][file]["untranslatedsourcewords"]

+             maxresult = max(maxresult, translated + untranslated)

+             number_of_files += 1

+ 

+         total_source_words += maxresult

+ 

+     results = {"nb_packages": number_of_packages,

+                "nb_files": number_of_files,

+                "totalsourcewords": total_source_words}

+ 

+     return results

+ 

+ 

  if __name__ == "__main__":

      main()

file modified
+9 -2
@@ -29,6 +29,7 @@ 

      langs_log = os.path.join(results_folder, "build_language_list.log")

      langs_stats = os.path.join(results_folder, "languages-stats")

      packages_stats = os.path.join(results_folder, "packages-stats")

+     distribution_folder = os.path.join(results_folder, "distribution-stats")

  

      data_langs_folder = os.path.join(results_folder, "languages-website")

      data_pkgs_folder = os.path.join(results_folder, "packages-website")
@@ -47,6 +48,9 @@ 

  

          os.makedirs(folder, exist_ok=True)

  

+     print("Get distribution stats")

+     distribution_stats = json.load(open(os.path.join(distribution_folder, "release.json")))

+ 

      print("Prepare json files for packages")

      packages = [d for d in os.listdir(packages_stats) if os.path.isfile(os.path.join(packages_stats, d))]

      log_files = pd.read_csv(langs_log, header=None, skipinitialspace=True)
@@ -79,7 +83,7 @@ 

              if os.path.isfile(dest_file):

                  continue

  

-             results = consolidate_language_stats(os.path.join(langs_stats, lang))

+             results = consolidate_language_stats(os.path.join(langs_stats, lang), distribution_stats)

              results["packages"] = packages_langs_results.get(code, dict())

              store_json_file(results, dest_file)

  
@@ -119,9 +123,10 @@ 

              shutil.copyfile(os.path.join(tm_folder, lang), os.path.join(static_tm_folder, lang))

  

  

- def consolidate_language_stats(stats_file):

+ def consolidate_language_stats(stats_file, distribution_stats):

      """ From a CSV file, return key indicators """

      results = dict()

+     total_words_distrib = distribution_stats.get("totalsourcewords", 0)

  

      fieldnames = {

          "filename": "str",
@@ -145,6 +150,8 @@ 

  

      results["packages"] = stats_df["package"].unique().tolist()

      results["progress"] = round(stats_df["translatedsourcewords"].sum() / stats_df["totalsourcewords"].sum() * 100, 1)

+     results["progress_d"] = round(stats_df["translatedsourcewords"].sum() / total_words_distrib * 100, 1)

+     results["totalsourcewords_d"] = total_words_distrib

  

      for kpi in ["totalsourcewords", "translatedsourcewords"]:

          results[kpi + "sum"] = int(stats_df[kpi].sum())

file modified
+7 -4
@@ -3,13 +3,16 @@ 

  date: {{ now }}

  ---

  

- Global progress for {{ lang_name_en }} ({{ lang_code }}) in Fedora {{ results }} is {{ progress }}%.

+ Language progress for {{ lang_name_en }} ({{ lang_code }}) in Fedora {{ results }} is:

+ 

+ * {{ progress }}% when we only look on started packages for this language.

+ * {{ progress_d }}% when we compare to every single translatable string in Fedora {{ results }}.

  

  Possible scripts are: {% for script in scripts -%}{{ script }} {%- endfor %}

  

- | Source words to translate  | Translated words |

- |---------------------------:|-----------------:|

- | {{ totalsourcewordssum }}  | {{ translatedsourcewordssum }} |

+ * Total translatable string in Fedora {{ results }}: {{ totalsourcewords_d }}

+ * Source words to translate in started packages: {{ totalsourcewordssum }}

+ * Translated words: {{ translatedsourcewordssum }}

  

  Download:

  

A language can be 100% on a limited set of packages, while having still a lot to do when compared to the whole distribution.

Pull-Request has been merged by jibecfed

3 years ago