| |
@@ -7,8 +7,10 @@
|
| |
import time
|
| |
import polib
|
| |
|
| |
+ from pprint import pprint
|
| |
+
|
| |
from shutil import rmtree
|
| |
- from weblate_language_data import aliases, languages, language_codes
|
| |
+ from weblate_language_data import aliases, languages, language_codes, countries
|
| |
|
| |
def main():
|
| |
"""Handle params"""
|
| |
@@ -23,26 +25,127 @@
|
| |
parser.add_argument("--refresh", action="store_true",
|
| |
help="Force refresh")
|
| |
|
| |
+ parser.add_argument("--describe", action="store_true",
|
| |
+ help="Describe the current list of languages")
|
| |
+
|
| |
+ parser.add_argument("--analyzelang", type=str,
|
| |
+ help="Produce an analyze file for a language")
|
| |
+
|
| |
+ parser.add_argument("--analyzealllangs", action="store_true",
|
| |
+ help="Produce an analyze file for all languages")
|
| |
+
|
| |
args = parser.parse_args()
|
| |
|
| |
release_folder = "./results/f{v}/".format(v=args.release)
|
| |
lang_path = os.path.join(release_folder, "languages/")
|
| |
packages_path = os.path.join(release_folder, "packages/")
|
| |
+ lang_analyze_folder = os.path.join(release_folder, "languages-analyses/")
|
| |
+
|
| |
+ if args.describe:
|
| |
+ print("Describing detecting languages")
|
| |
+ describe(lang_path)
|
| |
+
|
| |
+ elif args.analyzealllangs:
|
| |
+ rmtree(lang_analyze_folder, ignore_errors=True)
|
| |
+ os.mkdir(lang_analyze_folder)
|
| |
+
|
| |
+ langs = [f for f in os.listdir(lang_path) if os.path.isfile(os.path.join(lang_path, f))]
|
| |
+ for lang in sorted(langs):
|
| |
+ analyze = analyze_lang(lang_path, lang[:-len('.json')])
|
| |
+
|
| |
+ with open(os.path.join(lang_analyze_folder, lang), 'w') as f:
|
| |
+ f.write(json.dumps(analyze, indent=2))
|
| |
+
|
| |
+ elif args.analyzelang:
|
| |
+ print("Provide more data to analyze errors")
|
| |
+ analyze = analyze_lang(lang_path, args.analyzelang)
|
| |
+
|
| |
+ with open(os.path.join(lang_analyze_folder, args.analyzelang + '.json'), 'w') as f:
|
| |
+ f.write(json.dumps(analyze, indent=2))
|
| |
+
|
| |
+ elif args.refresh:
|
| |
+ print("Refreshing the list of languages")
|
| |
+ rmtree(lang_path, ignore_errors=True)
|
| |
+ os.mkdir(lang_path)
|
| |
+
|
| |
+ start_time_search = time.time()
|
| |
+
|
| |
+ po_langs = detect_languages(packages_path)
|
| |
+
|
| |
+ for lang in po_langs.keys():
|
| |
+ with open(os.path.join(lang_path, str(lang) + '.json'), 'w') as f:
|
| |
+ f.write(json.dumps(po_langs[lang], indent=2))
|
| |
+
|
| |
+ search_duration = round(time.time() - start_time_search, 1)
|
| |
+ print(" Done in {d} seconds".format(d=search_duration))
|
| |
+
|
| |
+
|
| |
+ def analyze_lang(lang_path, analized_lang):
|
| |
+ """ Analyze one lang """
|
| |
+ files = []
|
| |
+ results = dict()
|
| |
+ with open(os.path.join(lang_path, analized_lang + ".json"), "r") as read_file:
|
| |
+ files = json.load(read_file)
|
| |
+
|
| |
+ print(" Analysing language {l}, with {c} files".format(l=analized_lang,c=len(files)))
|
| |
|
| |
- print("Refreshing the list of languages")
|
| |
- rmtree(lang_path, ignore_errors=True)
|
| |
- os.mkdir(lang_path)
|
| |
+ for file in files:
|
| |
+ lang = "error"
|
| |
+ metadata = dict()
|
| |
+ error = ""
|
| |
+ try:
|
| |
+ metadata = polib.pofile(file).metadata
|
| |
+ except UnicodeDecodeError as e:
|
| |
+ # encoding error, to investigate before using it in TM
|
| |
+ metadata["Language"] = "error-unicode"
|
| |
+ except OSError as e:
|
| |
+ # maybe a polib bug? to investigate before using it in TM
|
| |
+ metadata["Language"] = "error-os"
|
| |
|
| |
- start_time_search = time.time()
|
| |
+ if "Language" not in metadata.keys():
|
| |
+ metadata["Language"] = "zzz_null"
|
| |
+ elif metadata["Language"] == "":
|
| |
+ metadata["Language"] = "zzz_empty"
|
| |
|
| |
- po_langs = detect_languages(packages_path)
|
| |
+ if analized_lang != "error":
|
| |
+ lang = choose_lang("", metadata, error)
|
| |
|
| |
- for lang in po_langs.keys():
|
| |
- with open(os.path.join(lang_path, lang + '.json'), 'w') as f:
|
| |
- f.write(json.dumps(po_langs[lang], indent=2))
|
| |
+ language = results.get(metadata.get("Language"), dict())
|
| |
|
| |
- search_duration = round(time.time() - start_time_search, 1)
|
| |
- print(" Done in {d} seconds".format(d=search_duration))
|
| |
+ count = language.get("Count", 0)
|
| |
+ count += 1
|
| |
+ language["Count"] = count
|
| |
+
|
| |
+ lang_files = language.get("Files", [])
|
| |
+ lang_files.append(file)
|
| |
+ language["Files"] = lang_files
|
| |
+
|
| |
+ plurals = language.get("Plural-Forms", [])
|
| |
+ plurals.append(metadata.get("Plural-Forms"))
|
| |
+ plurals = list(set(plurals))
|
| |
+ language["Plural-Forms"] = plurals
|
| |
+
|
| |
+ teams = language.get("Language-Team", [])
|
| |
+ teams.append(metadata.get("Language-Team"))
|
| |
+ teams = list(set(teams))
|
| |
+ language["Language-Team"] = teams
|
| |
+
|
| |
+ results[metadata.get("Language")] = language
|
| |
+
|
| |
+ results = dict(sorted(results.items(), key=lambda item: item[0]))
|
| |
+
|
| |
+ return results
|
| |
+
|
| |
+
|
| |
+ def describe(lang_path):
|
| |
+ """ Provide the number of files per language """
|
| |
+ langs = [f for f in os.listdir(lang_path) if os.path.isfile(os.path.join(lang_path, f))]
|
| |
+
|
| |
+ for lang in sorted(langs):
|
| |
+ with open(os.path.join(lang_path, lang), "r") as read_file:
|
| |
+ files = json.load(read_file)
|
| |
+
|
| |
+ print(" {l}:{c}".format(l=lang[:-len('.json')],c=len(files)))
|
| |
|
| |
|
| |
def detect_languages(tm_folder):
|
| |
@@ -82,35 +185,37 @@
|
| |
""" Returns: a language code """
|
| |
|
| |
lang = ""
|
| |
- file_name = filename.lower()
|
| |
- meta_language = ""
|
| |
- meta_team = ""
|
| |
- try:
|
| |
- meta_language = metadata.get("Language").lower()
|
| |
- except AttributeError:
|
| |
- pass
|
| |
-
|
| |
- try:
|
| |
- meta_team = metadata.get("Language-Team").lower()
|
| |
- except AttributeError:
|
| |
- pass
|
| |
+ file_name = filename.lower().replace("-", "_")
|
| |
+
|
| |
+ meta_language = metadata.get("Language","").lower().replace("-", "_")
|
| |
+
|
| |
+ meta_team = metadata.get("Language-Team","").lower().replace("-", "_")
|
| |
|
| |
if meta_language in language_codes.LANGUAGES:
|
| |
lang = meta_language
|
| |
|
| |
elif file_name in language_codes.LANGUAGES:
|
| |
lang = file_name
|
| |
+
|
| |
else:
|
| |
lang = "noresult"
|
| |
|
| |
# try languages (some codes here are exclused from languages_codes)
|
| |
if lang == "noresult":
|
| |
- loc = [ lang[0] for lang in languages.LANGUAGES ]
|
| |
+ codes = dict()
|
| |
+ for language in languages.LANGUAGES:
|
| |
+ # 0 is language code
|
| |
+ # 1 is language name
|
| |
+ codes[language[1].lower()] = language[0].lower()
|
| |
|
| |
- if meta_language in loc:
|
| |
- lang = meta_language.lower()
|
| |
- elif file_name in loc:
|
| |
- lang = file_name.lower()
|
| |
+ if meta_language in codes.values():
|
| |
+ lang = meta_language
|
| |
+
|
| |
+ elif file_name in codes.values():
|
| |
+ lang = file_name
|
| |
+
|
| |
+ elif meta_language in codes.keys():
|
| |
+ lang = codes.get(meta_language)
|
| |
|
| |
# try ALIASES
|
| |
if lang == "noresult":
|
| |
@@ -118,6 +223,12 @@
|
| |
lang = aliases.ALIASES[meta_language].lower()
|
| |
elif file_name in aliases.ALIASES.keys():
|
| |
lang = aliases.ALIASES[file_name].lower()
|
| |
+
|
| |
+ if lang == "noresult":
|
| |
+ if meta_language in countries.DEFAULT_LANGS:
|
| |
+ lang = meta_language.split("_", 1)[0]
|
| |
+ elif file_name in countries.DEFAULT_LANGS:
|
| |
+ lang = file_name.split("_", 1)[0]
|
| |
else:
|
| |
lang = "error"
|
| |
|
| |