| |
@@ -10,11 +10,6 @@
|
| |
import tempfile
|
| |
import logging
|
| |
|
| |
- from io import BytesIO
|
| |
- from translate.convert import po2tmx
|
| |
- from translate.storage import factory, po
|
| |
- from translate.tools import poterminology
|
| |
-
|
| |
|
| |
def main():
|
| |
"""Handle params"""
|
| |
@@ -57,7 +52,7 @@
|
| |
if args.refresh and os.path.isdir(tm_folder):
|
| |
shutil.rmtree(tm_folder)
|
| |
|
| |
- if not os.path.exists(tm_folder):
|
| |
+ if os.path.exists(tm_folder) is False:
|
| |
os.makedirs(tm_folder)
|
| |
|
| |
log.info("Building the translation memory for every languages")
|
| |
@@ -78,11 +73,19 @@
|
| |
compendium_file = os.path.join(
|
| |
os.path.dirname(os.path.abspath(__file__)), compendium_file
|
| |
)
|
| |
- if not os.path.isfile(compendium_file):
|
| |
+ compendium_archive = compendium_file + ".gz"
|
| |
+ if os.path.isfile(compendium_file) is False and os.path.isfile(compendium_archive) is False:
|
| |
+ log.info("Compendium generation")
|
| |
process_compendium(files, compendium_file, debug_folder)
|
| |
+ # remove non standard comments
|
| |
+ # taken from: https://github.com/translate/translate/blob/master/tools/pocommentclean
|
| |
+ command = ["sed", "-i", "/^#$/d;/^#[^\:\~,\.]/d", compendium_file]
|
| |
+ subprocess.run(command, check=True, capture_output=True)
|
| |
|
| |
tmx_file = os.path.join(tm_folder, lang_code + ".tmx")
|
| |
- if not os.path.isfile(tmx_file):
|
| |
+ tmx_archive = tmx_file + ".gz"
|
| |
+ if os.path.isfile(tmx_file) is False and os.path.isfile(tmx_archive) is False:
|
| |
+ log.info("TMX generation")
|
| |
try:
|
| |
process_tmx(lang_code, compendium_file, tmx_file)
|
| |
except Exception as e:
|
| |
@@ -93,7 +96,9 @@
|
| |
)
|
| |
|
| |
terminology_file = os.path.join(tm_folder, lang_code + ".terminology.po")
|
| |
- if not os.path.isfile(terminology_file):
|
| |
+ terminology_archive = terminology_file + ".gz"
|
| |
+ if os.path.isfile(terminology_file) is False and os.path.isfile(terminology_archive) is False:
|
| |
+ log.info("Terminology generation")
|
| |
try:
|
| |
process_terminology(compendium_file, terminology_file)
|
| |
except Exception as e:
|
| |
@@ -103,15 +108,21 @@
|
| |
)
|
| |
)
|
| |
|
| |
+ if args.compress:
|
| |
+ if os.path.isfile(compendium_file):
|
| |
+ compress(compendium_file, compendium_archive)
|
| |
+
|
| |
+ if os.path.isfile(tmx_file):
|
| |
+ compress(tmx_file, tmx_archive)
|
| |
+
|
| |
+ if os.path.isfile(terminology_file):
|
| |
+ compress(terminology_file, terminology_archive)
|
| |
+
|
| |
log.info("All languages are processed")
|
| |
|
| |
log.info("Detecting missing files")
|
| |
for lang in sorted(langs):
|
| |
- check_lang(lang[: -len(".json")], tm_folder)
|
| |
-
|
| |
- if args.compress:
|
| |
- log.info("Compressing files")
|
| |
- compress(tm_folder)
|
| |
+ check_lang(lang[: -len(".json")], tm_folder, args.compress)
|
| |
|
| |
|
| |
def process_compendium(langfiles, dest, debug_folder):
|
| |
@@ -149,7 +160,9 @@
|
| |
subprocess.run(command, check=True, cwd=tmp, capture_output=True)
|
| |
except subprocess.CalledProcessError as e:
|
| |
debug_filename = "tm-msguniq-{lang}-{name}".format(lang=dest.split("/")[-1], name=count.__str__())
|
| |
- log.error(" msguniq error with {i} a copy of this file is into {d} as {n}".format(i=i, e=e.output, d=debug_folder, n=debug_filename))
|
| |
+ log.error(" msguniq error with {i} a copy of this file is into {d} as {n}".format(i=i, e=e.output,
|
| |
+ d=debug_folder,
|
| |
+ n=debug_filename))
|
| |
shutil.copyfile(i, os.path.join(debug_folder, debug_filename))
|
| |
|
| |
count += 1
|
| |
@@ -219,7 +232,7 @@
|
| |
else:
|
| |
if doubt is not False:
|
| |
log.debug("This file raised a msgcat bug: {f}".format(f=doubt))
|
| |
- store_debug_file(path, "tm-msgcat-"+destination.split("/")[-1], doubt, debug_folder)
|
| |
+ store_debug_file(path, "tm-msgcat-" + destination.split("/")[-1], doubt, debug_folder)
|
| |
ongoing = []
|
| |
|
| |
half = int(len(ongoing) / 2)
|
| |
@@ -241,63 +254,20 @@
|
| |
def process_tmx(lang, source, dest):
|
| |
""" Generate a translation memory from a po file """
|
| |
|
| |
- """
|
| |
- outputfile = po2tmx.tmxmultifile(dest)
|
| |
- po2tmx.convertpo(
|
| |
- inputfile=BytesIO(open(source, "r").read().encode()),
|
| |
- outputfile=outputfile,
|
| |
- templatefile=None,
|
| |
- sourcelanguage="en",
|
| |
- targetlanguage=lang,
|
| |
- comment="source",
|
| |
- )
|
| |
-
|
| |
- outputfile.tmxfile.savefile(dest)
|
| |
- """
|
| |
-
|
| |
command = ["po2tmx", "--language=" + lang, "--progress=none", source, "--output=" + dest]
|
| |
subprocess.run(command, check=True, capture_output=True)
|
| |
|
| |
+
|
| |
def process_terminology(source, dest):
|
| |
""" Generate a termonology from a po file """
|
| |
|
| |
-
|
| |
- """
|
| |
- extractor = poterminology.TerminologyExtractor()
|
| |
- options = {
|
| |
- "inputmin": "1",
|
| |
- "fullmsgmin": "1",
|
| |
- "substrmin": "2",
|
| |
- "locmin": "2",
|
| |
- "nonstopmin": 1,
|
| |
- "sortorders": ["frequency", "dictionary", "length"],
|
| |
- "output": dest,
|
| |
- }
|
| |
-
|
| |
- with open(source, "rb") as fh:
|
| |
- inputfile = factory.getobject(fh)
|
| |
-
|
| |
- extractor.processunits(inputfile.units, source)
|
| |
- terms = extractor.extract_terms()
|
| |
-
|
| |
- termfile = po.pofile()
|
| |
- termitems = extractor.filter_terms(
|
| |
- terms, nonstopmin=options["nonstopmin"], sortorders=options["sortorders"]
|
| |
- )
|
| |
- for count, unit in termitems:
|
| |
- termfile.units.append(unit)
|
| |
-
|
| |
- with open(options["output"], "wb") as fh:
|
| |
- termfile.serialize(fh)
|
| |
-
|
| |
- """
|
| |
command = ["poterminology", "--ignore-case", "--fold-titlecase",
|
| |
- "--inputs-needed", "1",
|
| |
- "--progress=none", source, "--output=" + dest]
|
| |
+ "--inputs-needed", "1",
|
| |
+ "--progress=none", source, "--output=" + dest]
|
| |
subprocess.run(command, check=True, capture_output=True)
|
| |
|
| |
|
| |
- def check_lang(lang, tm_folder):
|
| |
+ def check_lang(lang, tm_folder, compress):
|
| |
""" Check if expected files were generated """
|
| |
log = logging.getLogger("buildTm.check_lang")
|
| |
|
| |
@@ -305,33 +275,31 @@
|
| |
tmx_file = os.path.join(tm_folder, lang + ".tmx")
|
| |
terminology_file = os.path.join(tm_folder, lang + ".terminology.po")
|
| |
|
| |
- if not os.path.isfile(compendium_file):
|
| |
+ if compress is True:
|
| |
+ compendium_file += ".gz"
|
| |
+ tmx_file += ".gz"
|
| |
+ terminology_file += ".gz"
|
| |
+
|
| |
+ if os.path.isfile(compendium_file) is False:
|
| |
log.warning("{l}-compendium is missing".format(l=lang))
|
| |
|
| |
- if not os.path.isfile(tmx_file):
|
| |
+ if os.path.isfile(tmx_file) is False:
|
| |
log.warning("{l}-tmx is missing".format(l=lang))
|
| |
|
| |
- if not os.path.isfile(terminology_file):
|
| |
+ if os.path.isfile(terminology_file) is False:
|
| |
log.warning("{l}-terminology is missing".format(l=lang))
|
| |
|
| |
|
| |
- def compress(folder):
|
| |
+ def compress(source, archive):
|
| |
""" Compress files uzing gzip """
|
| |
log = logging.getLogger("buildTm.compress")
|
| |
|
| |
- files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
|
| |
-
|
| |
- for file in sorted(files):
|
| |
- if file.endswith(".gz"):
|
| |
- continue
|
| |
-
|
| |
- dest = file + ".gz"
|
| |
- if os.path.isfile(os.path.join(folder, dest)):
|
| |
- continue
|
| |
+ log.info("Compressing")
|
| |
+ with open(source, "rb") as file_in:
|
| |
+ with gzip.open(archive, "wb") as file_out:
|
| |
+ file_out.writelines(file_in)
|
| |
|
| |
- with open(os.path.join(folder, file), "rb") as file_in:
|
| |
- with gzip.open(os.path.join(folder, dest), "wb") as file_out:
|
| |
- file_out.writelines(file_in)
|
| |
+ os.remove(source)
|
| |
|
| |
|
| |
if __name__ == "__main__":
|
| |