| |
@@ -2,8 +2,10 @@
|
| |
"""Consolidate each po files into compendium"""
|
| |
|
| |
import argparse
|
| |
+ import datetime
|
| |
import jinja2
|
| |
import json
|
| |
+ import langtable
|
| |
import os
|
| |
import pandas as pd
|
| |
import shutil
|
| |
@@ -19,6 +21,9 @@
|
| |
choices=[30, 31, 32],
|
| |
help="Provide the Fedora release to analyze")
|
| |
|
| |
+ parser.add_argument("--refresh", action="store_true",
|
| |
+ help="Force refresh of files")
|
| |
+
|
| |
args = parser.parse_args()
|
| |
|
| |
release_folder = "./results/f{v}/".format(v=args.release)
|
| |
@@ -30,164 +35,196 @@
|
| |
data_langs_folder = os.path.join(data_folder, "languages")
|
| |
data_pkgs_folder = os.path.join(data_folder, "packages")
|
| |
|
| |
+ tm_folder = os.path.join(release_folder, "languages-tm")
|
| |
+
|
| |
static_folder = "./website/content/f{v}/".format(v=args.release)
|
| |
static_langs_folder = os.path.join(static_folder, "language")
|
| |
static_pkgs_folder = os.path.join(static_folder, "package")
|
| |
+ static_tm_folder = "./website/static/f{v}/".format(v=args.release)
|
| |
|
| |
# clean destination folders
|
| |
- for folder in [data_langs_folder, data_pkgs_folder, static_langs_folder, static_pkgs_folder]:
|
| |
- if os.path.isdir(folder):
|
| |
+ for folder in [data_langs_folder, data_pkgs_folder, static_langs_folder, static_pkgs_folder, static_tm_folder]:
|
| |
+ if args.refresh and os.path.isdir(folder):
|
| |
shutil.rmtree(folder)
|
| |
|
| |
- os.makedirs(folder)
|
| |
+ os.makedirs(folder, exist_ok=True)
|
| |
|
| |
- # prepare json files for packages
|
| |
- print("prepare json files for packages")
|
| |
- packages = [d for d in os.listdir(packages_stats) if os.path.isdir(os.path.join(packages_stats, d))]
|
| |
+ print("Prepare json files for packages")
|
| |
+ packages = [d for d in os.listdir(packages_stats) if os.path.isfile(os.path.join(packages_stats, d))]
|
| |
log_files = pd.read_csv(langs_log, header=None, skipinitialspace=True)
|
| |
log_files = log_files.iloc[:, [0, 4]]
|
| |
- log_files.columns = ["Filename", "lang_code"]
|
| |
+ log_files.columns = ["filename", "lang_code"]
|
| |
|
| |
packages_langs_results = dict()
|
| |
for package in sorted(packages):
|
| |
- file_stats = os.path.join(packages_stats, package, "stats.csv")
|
| |
- if not os.path.isfile(file_stats):
|
| |
- print(" Package: {p} missing stats file {f}".format(p=package, f=file_stats))
|
| |
- continue
|
| |
+ name = package[:-len(".json")]
|
| |
+ dest_file = os.path.join(data_pkgs_folder, name + ".json")
|
| |
+ file_stats = os.path.join(packages_stats, name + ".json")
|
| |
|
| |
results = consolidate_package_stats(file_stats, log_files)
|
| |
- store_json_file(package, results, data_pkgs_folder)
|
| |
+ store_json_file(results, dest_file)
|
| |
|
| |
langs_results = results.get("equalsormorethan80percent", []) + results.get("between50and80percent", []) + results.get("lessorequalto50percent", [])
|
| |
|
| |
for langs in langs_results:
|
| |
val = packages_langs_results.get(langs["lang_code"], [])
|
| |
- val.append({"name": package, "progress": langs["progress"]})
|
| |
+ val.append({"name": name, "progress": langs["progress"]})
|
| |
packages_langs_results[langs["lang_code"]] = val
|
| |
|
| |
- # prepare json files for languages
|
| |
- print("prepare json files for languages")
|
| |
+ print("Prepare json files for languages")
|
| |
langs = [f for f in os.listdir(langs_stats) if os.path.isfile(os.path.join(langs_stats, f))]
|
| |
for lang in sorted(langs):
|
| |
- if lang.endswith(".stats.csv"):
|
| |
- code = lang[:-len(".stats.csv")]
|
| |
+ if lang.endswith(".json"):
|
| |
+ code = lang[:-len(".json")]
|
| |
+ dest_file = os.path.join(data_langs_folder, code + ".json")
|
| |
+
|
| |
+ if os.path.isfile(dest_file):
|
| |
+ continue
|
| |
+
|
| |
results = consolidate_language_stats(os.path.join(langs_stats, lang))
|
| |
results["packages"] = packages_langs_results.get(code, dict())
|
| |
- store_json_file(code, results, data_langs_folder)
|
| |
+ store_json_file(results, dest_file)
|
| |
|
| |
# generate static content for languages
|
| |
- print("generate static content for languages")
|
| |
+ print("Generate static content for languages")
|
| |
langs = [f for f in os.listdir(data_langs_folder) if os.path.isfile(os.path.join(data_langs_folder, f))]
|
| |
for lang in sorted(langs):
|
| |
code = lang[:-len(".json")]
|
| |
- dest = os.path.join(static_langs_folder, code + ".md")
|
| |
+ dest_file = os.path.join(static_langs_folder, code + ".md")
|
| |
+
|
| |
+ if os.path.isfile(dest_file):
|
| |
+ continue
|
| |
+
|
| |
with open(os.path.join(data_langs_folder, lang), "r") as read_file:
|
| |
content = json.load(read_file)
|
| |
|
| |
- generate_static_pages_langs(args.release, code, content, dest)
|
| |
+ generate_static_pages_langs(args.release, code, content, dest_file)
|
| |
|
| |
- print("generate static content for packages")
|
| |
- # generate static content for packages
|
| |
+ print("Generate static content for packages")
|
| |
packages = [f for f in os.listdir(data_pkgs_folder) if os.path.isfile(os.path.join(data_pkgs_folder, f))]
|
| |
for package in sorted(packages):
|
| |
code = package[:-len(".json")]
|
| |
- dest = os.path.join(static_pkgs_folder, code + ".md")
|
| |
+ dest_file = os.path.join(static_pkgs_folder, code + ".md")
|
| |
+
|
| |
+ if os.path.isfile(dest_file):
|
| |
+ continue
|
| |
+
|
| |
with open(os.path.join(data_pkgs_folder, package), "r") as read_file:
|
| |
content = json.load(read_file)
|
| |
|
| |
- generate_static_pages_packages(args.release, code, content, dest)
|
| |
+ generate_static_pages_packages(args.release, code, content, dest_file)
|
| |
+
|
| |
+ print("Copy translation memories")
|
| |
+ langs = [f for f in os.listdir(tm_folder) if os.path.isfile(os.path.join(tm_folder, f))]
|
| |
+ for lang in langs:
|
| |
+ if lang.endswith(".gz"):
|
| |
+ shutil.copyfile(os.path.join(tm_folder, lang), os.path.join(static_tm_folder, lang))
|
| |
|
| |
|
| |
- def consolidate_language_stats(csv_file):
|
| |
+ def consolidate_language_stats(stats_file):
|
| |
""" From a CSV file, return key indicators """
|
| |
results = dict()
|
| |
|
| |
- fieldnames = {"Filename": "str",
|
| |
- "TranslatedMessages": "int",
|
| |
- "TranslatedSourceWords": "int",
|
| |
- "TranslatedTargetWords": "int",
|
| |
- "FuzzyMessages": "int",
|
| |
- "FuzzySourceWords": "int",
|
| |
- "UntranslatedMessages": "int",
|
| |
- "UntranslatedSource Words": "int",
|
| |
- "TotalMessage": "int",
|
| |
- "TotalSourceWords": "int",
|
| |
- "ReviewMessages": "int",
|
| |
- "ReviewSourceWords": "int"}
|
| |
-
|
| |
- stats_df = pd.read_csv(csv_file, header=0, skipinitialspace=True)
|
| |
+ fieldnames = {
|
| |
+ "filename": "str",
|
| |
+ "translatedsourcewords": "int",
|
| |
+ "fuzzysourcewords": "int",
|
| |
+ "untranslatedsourcewords": "int",
|
| |
+ "translated": "int",
|
| |
+ "fuzzy": "int",
|
| |
+ "untranslated": "int",
|
| |
+ "translatedtargetwords": "int",
|
| |
+ "totalsourcewords": "int"
|
| |
+ }
|
| |
+
|
| |
+ stats_df = pd.read_json(stats_file, orient="index")
|
| |
stats_df.fillna(0, inplace=True)
|
| |
+ stats_df.reset_index(level=0, inplace=True)
|
| |
+ stats_df["totalsourcewords"] = stats_df["untranslatedsourcewords"] + stats_df["translatedsourcewords"]
|
| |
stats_df.columns = fieldnames.keys()
|
| |
|
| |
- stats_df["package"] = stats_df["Filename"].str.split("/", expand=True)[4]
|
| |
+ stats_df["package"] = stats_df["filename"].str.split("/", expand=True)[4]
|
| |
|
| |
results["packages"] = stats_df["package"].unique().tolist()
|
| |
- results["progress"] = round(stats_df["TranslatedSourceWords"].sum() / stats_df["TotalSourceWords"].sum() * 100, 1)
|
| |
+ results["progress"] = round(stats_df["translatedsourcewords"].sum() / stats_df["totalsourcewords"].sum() * 100, 1)
|
| |
|
| |
- for kpi in ["TotalSourceWords", "TranslatedSourceWords"]:
|
| |
- results[kpi + "Sum"] = int(stats_df[kpi].sum())
|
| |
+ for kpi in ["totalsourcewords", "translatedsourcewords"]:
|
| |
+ results[kpi + "sum"] = int(stats_df[kpi].sum())
|
| |
|
| |
return results
|
| |
|
| |
|
| |
- def consolidate_package_stats(csv_file, log_files):
|
| |
+ def consolidate_package_stats(stats_file, log_files):
|
| |
""" From a CSV file, return key indicators """
|
| |
results = dict()
|
| |
|
| |
- fieldnames = {"Filename": "str",
|
| |
- "TranslatedMessages": "int",
|
| |
- "TranslatedSourceWords": "int",
|
| |
- "TranslatedTargetWords": "int",
|
| |
- "FuzzyMessages": "int",
|
| |
- "FuzzySourceWords": "int",
|
| |
- "UntranslatedMessages": "int",
|
| |
- "UntranslatedSource Words": "int",
|
| |
- "TotalMessage": "int",
|
| |
- "TotalSourceWords": "int",
|
| |
- "ReviewMessages": "int",
|
| |
- "ReviewSourceWords": "int"}
|
| |
-
|
| |
- try:
|
| |
- stats_df = pd.read_csv(csv_file, header=0, skipinitialspace=True)
|
| |
- except pd.errors.EmptyDataError as e:
|
| |
- print(" File {f} raised {e}".format(f=csv_file, e=e))
|
| |
- return results
|
| |
+ fieldnames = {
|
| |
+ "filename": "str",
|
| |
+ "translatedsourcewords": "int",
|
| |
+ "fuzzysourcewords": "int",
|
| |
+ "untranslatedsourcewords": "int",
|
| |
+ "translated": "int",
|
| |
+ "fuzzy": "int",
|
| |
+ "untranslated": "int",
|
| |
+ "translatedtargetwords": "int",
|
| |
+ "totalsourcewords": "int"
|
| |
+ }
|
| |
+
|
| |
+ _json = json.load(open(stats_file))
|
| |
+ dfs = []
|
| |
+ total_source_words = 0
|
| |
+
|
| |
+ for template in _json.keys():
|
| |
+ tmp_df = pd.DataFrame.from_dict(_json.get(template), orient="index")
|
| |
+ tmp_df.fillna(0, inplace=True)
|
| |
+ tmp_df.reset_index(level=0, inplace=True)
|
| |
+
|
| |
+ # sometimes, no file were found, which means no stats can be used
|
| |
+ if len(tmp_df) == 0:
|
| |
+ print(" The template {t} for {f} is empty".format(t=template, f=stats_file))
|
| |
+ continue
|
| |
|
| |
- stats_df.fillna(0, inplace=True)
|
| |
- stats_df.columns = fieldnames.keys()
|
| |
+ tmp_df["totalsourcewords"] = tmp_df["untranslatedsourcewords"] + tmp_df["translatedsourcewords"]
|
| |
+ tmp_df.columns = fieldnames.keys()
|
| |
|
| |
- stats_df_w_lang = pd.merge(stats_df, log_files, how="inner", on="Filename")
|
| |
- stats_df_no_lang = pd.merge(stats_df, log_files, how="outer", indicator=True).loc[lambda x: x["_merge"] == "left_only"]
|
| |
+ total_source_words += max(tmp_df["totalsourcewords"])
|
| |
+
|
| |
+ dfs.append(tmp_df)
|
| |
|
| |
- try:
|
| |
- total_source_words = int(max(stats_df_w_lang["TotalSourceWords"]))
|
| |
- except ValueError as e:
|
| |
- print(" File {f} raised ValueError {e}".format(f=csv_file, e=e))
|
| |
+ if len(dfs) > 1:
|
| |
+ stats_df = pd.concat(dfs)
|
| |
+ elif len(dfs) == 0:
|
| |
+ print("There is no stats for {f}".format(f=stats_file))
|
| |
return results
|
| |
+ else:
|
| |
+ stats_df = dfs[0]
|
| |
+
|
| |
+ stats_df_w_lang = pd.merge(stats_df, log_files, how="inner", on="filename")
|
| |
+ stats_df_no_lang = pd.merge(stats_df, log_files, how="outer", indicator=True).loc[lambda x: x["_merge"] == "left_only"]
|
| |
|
| |
- temp = stats_df_w_lang.groupby(["lang_code"]).agg({"TranslatedSourceWords": ["sum"], }).reset_index().droplevel(1, axis=1).to_dict(orient="records")
|
| |
+ temp = stats_df_w_lang.groupby(["lang_code"]).agg({"translatedsourcewords": ["sum"], }).reset_index().droplevel(1, axis=1).to_dict(orient="records")
|
| |
for line in temp:
|
| |
line["progress"] = 0
|
| |
p = 0
|
| |
if total_source_words == 0:
|
| |
- print(" File {f} has TranslatedSourceWords = 0".format(f=csv_file))
|
| |
+ print(" File {f} for file has translatedsourcewords = 0 in line {l}".format(f=stats_file, l=line))
|
| |
line["progress"] = p
|
| |
continue
|
| |
try:
|
| |
- p = round((int(line["TranslatedSourceWords"]) / total_source_words)*100)
|
| |
+ p = round((int(line["translatedsourcewords"]) / total_source_words)*100)
|
| |
except OverflowError:
|
| |
print(" File {f} has Translated={t} and Source={tot}".format(
|
| |
- f=csv_file,
|
| |
- t=line["TranslatedSourceWords"],
|
| |
+ f=stats_file,
|
| |
+ t=line["translatedsourcewords"],
|
| |
tot=total_source_words))
|
| |
|
| |
line["progress"] = p
|
| |
|
| |
- results["TotalSourceWords"] = total_source_words
|
| |
+ results["totalsourcewords"] = total_source_words
|
| |
results["count_languages"] = len(pd.unique(stats_df_w_lang["lang_code"]))
|
| |
|
| |
for line in sorted(temp, key=lambda k: k["progress"], reverse=True):
|
| |
- del line["TranslatedSourceWords"]
|
| |
+ del line["translatedsourcewords"]
|
| |
if line["progress"] <= 50:
|
| |
hop = results.get("lessorequalto50percent", [])
|
| |
hop.append(line)
|
| |
@@ -201,43 +238,48 @@
|
| |
hop.append(line)
|
| |
results["equalsormorethan80percent"] = hop
|
| |
|
| |
- results["no_languages"] = stats_df_no_lang["Filename"].tolist()
|
| |
+ results["no_languages"] = stats_df_no_lang["filename"].tolist()
|
| |
|
| |
return results
|
| |
|
| |
|
| |
- def generate_static_pages_langs(release, code, content, dest):
|
| |
+ def generate_static_pages_langs(release, code, content, dest_file):
|
| |
data = content
|
| |
+ data["lang_name_en"] = langtable.language_name(languageId = code, languageIdQuery = "en")
|
| |
+ data["lang_name_local"] = langtable.language_name(languageId = code)
|
| |
+ data["scripts"] = langtable.list_scripts(languageId = code)
|
| |
data["release"] = release
|
| |
data["lang_code"] = code
|
| |
+ data["now"] = datetime.datetime.utcnow()
|
| |
|
| |
templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")
|
| |
- templateEnv = jinja2.Environment(loader=templateLoader)
|
| |
+ templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)
|
| |
TEMPLATE_FILE = "language.md"
|
| |
template = templateEnv.get_template(TEMPLATE_FILE)
|
| |
outputText = template.render(data)
|
| |
|
| |
- with open(dest, "w") as write_out:
|
| |
+ with open(dest_file, "w") as write_out:
|
| |
write_out.write(outputText)
|
| |
|
| |
|
| |
- def generate_static_pages_packages(release, code, content, dest):
|
| |
+ def generate_static_pages_packages(release, code, content, dest_file):
|
| |
data = content
|
| |
data["release"] = release
|
| |
data["package"] = code
|
| |
+ data["now"] = datetime.datetime.utcnow()
|
| |
|
| |
templateLoader = jinja2.FileSystemLoader(searchpath="./templates/")
|
| |
- templateEnv = jinja2.Environment(loader=templateLoader)
|
| |
+ templateEnv = jinja2.Environment(loader=templateLoader, undefined=jinja2.Undefined)
|
| |
TEMPLATE_FILE = "package.md"
|
| |
template = templateEnv.get_template(TEMPLATE_FILE)
|
| |
outputText = template.render(data)
|
| |
|
| |
- with open(dest, "w") as write_out:
|
| |
+ with open(dest_file, "w") as write_out:
|
| |
write_out.write(outputText)
|
| |
|
| |
|
| |
- def store_json_file(code, content, dest):
|
| |
- with open(os.path.join(dest, code + ".json"), "w") as f:
|
| |
+ def store_json_file(content, dest_file):
|
| |
+ with open(dest_file, "w") as f:
|
| |
f.write(json.dumps(content, indent=2))
|
| |
|
| |
|
| |