From c6eb59eb42fedb8fa5755b3e47831d39d5525a3e Mon Sep 17 00:00:00 2001 From: Florent Rougon Date: Mon, 26 Jun 2017 10:45:57 +0200 Subject: [PATCH] Initial version of the Python scripts to manage l10n using the XLIFF format Add the following files: python3-flightgear/README-l10n.txt python3-flightgear/fg-convert-translation-files python3-flightgear/fg-new-translations python3-flightgear/fg-update-translation-files python3-flightgear/flightgear/__init__.py python3-flightgear/flightgear/meta/__init__.py python3-flightgear/flightgear/meta/exceptions.py python3-flightgear/flightgear/meta/i18n.py python3-flightgear/flightgear/meta/logging.py python3-flightgear/flightgear/meta/misc.py They should work on Python 3.4 and later (tested with 3.5.3). The folder structure is chosen so that other FG support modules can insert themselves here, and possibly be used together. I put all of these inside 'flightgear.meta', because I don't expect them to be needed at FG runtime (neither now nor in the future), probably not even by the CMake build system. To declare that a string has plural forms, simply set the attribute 'with-plural' to 'true' on the corresponding element of the default translation (and as in Qt, use %n as a placeholder for the number that determines which singular or plural form to use). --- python3-flightgear/README-l10n.txt | 83 + .../fg-convert-translation-files | 186 ++ python3-flightgear/fg-new-translations | 125 ++ .../fg-update-translation-files | 184 ++ python3-flightgear/flightgear/__init__.py | 0 .../flightgear/meta/__init__.py | 0 .../flightgear/meta/exceptions.py | 58 + python3-flightgear/flightgear/meta/i18n.py | 1822 +++++++++++++++++ python3-flightgear/flightgear/meta/logging.py | 95 + python3-flightgear/flightgear/meta/misc.py | 81 + 10 files changed, 2634 insertions(+) create mode 100644 python3-flightgear/README-l10n.txt create mode 100755 python3-flightgear/fg-convert-translation-files create mode 100755 python3-flightgear/fg-new-translations create mode 100755 python3-flightgear/fg-update-translation-files create mode 100644 python3-flightgear/flightgear/__init__.py create mode 100644 python3-flightgear/flightgear/meta/__init__.py create mode 100644 python3-flightgear/flightgear/meta/exceptions.py create mode 100644 python3-flightgear/flightgear/meta/i18n.py create mode 100644 python3-flightgear/flightgear/meta/logging.py create mode 100644 python3-flightgear/flightgear/meta/misc.py diff --git a/python3-flightgear/README-l10n.txt b/python3-flightgear/README-l10n.txt new file mode 100644 index 0000000..f2887b7 --- /dev/null +++ b/python3-flightgear/README-l10n.txt @@ -0,0 +1,83 @@ +Quick start for the localization (l10n) scripts +=============================================== + +The following assumes that all of these are in present in +$FG_ROOT/Translations: + - the default translation (default/*.xml); + - the legacy FlightGear XML localization files (/*.xml); + - except for 'fg-convert-translation-files' which creates them, existing + XLIFF 1.2 files (/FlightGear-nonQt.xlf). + +Note: the legacy FlightGear XML localization files are only needed by + 'fg-convert-translation-files' when migrating to the XLIFF format. The + other scripts only need the default translation and obviously, for + 'fg-update-translation-files', the current XLIFF files. + +To get the initial XLIFF files (generated from the default translation in +$FG_ROOT/Translations/default as well as the legacy FlightGear XML +localization files in $FG_ROOT/Translations/): + + languages="de en_US es fr it nl pl pt zh_CN" + + # Your shell must expand $languages as several words. POSIX shell does that, + # but not zsh for instance. Otherwise, don't use a shell variable. + fg-convert-translation-files --transl-dir="$FG_ROOT/Translations" $languages + + # Add strings found in the default translation but missing in the legacy FG + # XML l10n files + fg-update-translation-files --transl-dir="$FG_ROOT/Translations" \ + merge-new-master $languages + +When master strings[1] have changed (in a large sense, i.e.: strings added, +modified or removed, or categories added or removed[2]): + + fg-update-translation-files --transl-dir="$FG_ROOT/Translations" \ + merge-new-master $languages + +To remove unused translated strings (not to be done too often in my opinion): + + fg-update-translation-files --transl-dir="$FG_ROOT/Translations" \ + remove-unused $languages + +(you may replace 'remove-unused' with 'mark-unused' to just mark the strings +as not-to-be-translated, however 'merge-new-master' presented above already +does that) + +To create skeleton translations for new languages (e.g., for fr_BE, en_AU and +ca): + + 1) Check (add if necessary) that flightgear/meta/i18n.py knows the plural + forms used in the new languages. This is done by editing PLURAL_FORMS + towards the top of this i18n.py file (very easy). If the existing entry + for, e.g., "zh" is sufficient for zh_TW or zh_HK, just let "zh" handle + them: it will be tried as fallback if there is no perfect match on + language and territory. + + 2) Run a command such as: + + fg-new-translations --transl-dir="$FG_ROOT/Translations" fr_BE en_AU ca + + (if you do this for only one language at a time, you can use the -o + option to precisely control where the output goes, otherwise + fg-new-translations chooses an appropriate place based on the value + specified for --transl-dir) + +fg-convert-translation-files, fg-update-translation-files and +fg-new-translations all support the --help option for more detailed +information. + + +Footnotes +--------- + + [1] Strings in the default translation. + + [2] Only empty categories are removed by this command. An obsolete category + can be made empty by manual editing (easy, just locate the right + ) or this way: + + fg-update-translation-files --transl-dir=... mark-unused + fg-update-translation-files --transl-dir=... remove-unused + + (note that this will remove *all* strings marked as unused in the first + step, not only those in some particular category!) diff --git a/python3-flightgear/fg-convert-translation-files b/python3-flightgear/fg-convert-translation-files new file mode 100755 index 0000000..95d77be --- /dev/null +++ b/python3-flightgear/fg-convert-translation-files @@ -0,0 +1,186 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +# fg-convert-translation-files --- Convert FlightGear's translation files +# Copyright (C) 2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import argparse +import collections +import locale +import os +import sys + +try: + import xml.etree.ElementTree as et +except ImportError: + import elementtree.ElementTree as et + +import flightgear.meta.logging +import flightgear.meta.i18n as fg_i18n + + +PROGNAME = os.path.basename(sys.argv[0]) + +# Only messages with severity >= info will be printed to the terminal (it's +# possible to also log all messages to a file regardless of their level, see +# the Logger class). Of course, there is also the standard logging module... +logger = flightgear.meta.logging.Logger( + progname=PROGNAME, + logLevel=flightgear.meta.logging.LogLevel.info, + defaultOutputStream=sys.stderr) + +debug = logger.debug +info = logger.info +notice = logger.notice +warning = logger.warning +error = logger.error +critical = logger.critical + + +# We could use Translation.__str__(): not as readable (for now) but more +# accurate on metadata +def printPlainText(l10nResPoolMgr, translations): + """Print output suitable for a quick review (by the programmer).""" + firstLang = True + + for langCode, (transl, nbWhitespacePbs) in translations.items(): + # 'transl' is a Translation instance + if firstLang: + firstLang = False + else: + print() + + print("-" * 78 + "\n" + langCode + "\n" + "-" * 78) + print("\nNumber of leading and/or trailing whitespace problems: {}" + .format(nbWhitespacePbs)) + + for cat in transl: + print("\nCategory: {cat}\n{underline}".format( + cat=cat, underline="~"*(len("Category: ") + len(cat)))) + t = transl[cat] + + for tid, translUnit in sorted(t.items()): + # - Using '{master!r}' and '{transl!r}' prints stuff such as + # \xa0 for nobreak spaces, which can lead to the erroneous + # conclusion that there was an encoding problem. + # - Only printing the first target text here (no plural forms) + print("\n{id}\n '{sourceText}'\n '{targetText}'" + .format(id=tid.id(), sourceText=translUnit.sourceText, + targetText=translUnit.targetTexts[0])) + + +def writeXliff(l10nResPoolMgr, translations): + formatHandler = fg_i18n.XliffFormatHandler() + + for langCode, translData in translations.items(): + translation = translData.transl # Translation instance + + if params.output_dir is None: + # Use default locations for the written xliff files + l10nResPoolMgr.writeTranslation(formatHandler, translation) + else: + basename = "{}-{}.{}".format( + formatHandler.defaultFileStem(langCode), + langCode, + formatHandler.standardExtension) + filePath = os.path.join(params.output_dir, basename) + formatHandler.writeTranslation(translation, filePath) + + +def processCommandLine(): + params = argparse.Namespace() + + parser = argparse.ArgumentParser( + usage="""\ +%(prog)s [OPTION ...] LANGUAGE_CODE... +Convert FlightGear's old XML translation files into other formats.""", + description="""\ +Most notably, XLIFF format can be chosen for output. The script performs +a few automated checks on the input files too.""", + formatter_class=argparse.RawDescriptionHelpFormatter, + # I want --help but not -h (it might be useful for something else) + add_help=False) + + parser.add_argument("-t", "--transl-dir", + help="""\ + directory containing all translation subdirs (such as + {default!r}, 'en_GB', 'fr_FR', 'de', 'it'...). This + "option" MUST be specified.""".format( + default=fg_i18n.DEFAULT_LANG_DIR)) + parser.add_argument("lang_code", metavar="LANGUAGE_CODE", nargs="+", + help="""\ + codes of languages to read translations for (don't + specify {default!r} this way, it is special and not a + language code)""" + .format(default=fg_i18n.DEFAULT_LANG_DIR)) + parser.add_argument("-o", "--output-dir", + help="""\ + output directory for written XLIFF files + (default: for each output file, use a suitable location + under TRANSL_DIR)""") + parser.add_argument("-f", "--output-format", default="xliff", + choices=("xliff", "text"), help="""\ + format to use for the output files""") + parser.add_argument("--help", action="help", + help="display this message and exit") + + params = parser.parse_args(namespace=params) + + if params.transl_dir is None: + error("--transl-dir must be given, aborting") + sys.exit(1) + + return params + + +def main(): + global params + + locale.setlocale(locale.LC_ALL, '') + params = processCommandLine() + + l10nResPoolMgr = fg_i18n.L10NResourcePoolManager(params.transl_dir, logger) + # English version of all translatable strings + masterTransl, nbWhitespaceProblemsInMaster = \ + l10nResPoolMgr.readFgMasterTranslation() + translations = collections.OrderedDict() + + # Sort elements of 'translations' according to language code (= the keys) + for langCode in sorted(params.lang_code): + translationData = l10nResPoolMgr.readFgTranslation(masterTransl, + langCode) + translations[translationData.transl.targetLanguage] = translationData + + if params.output_format == "xliff": + writeFunc = writeXliff # write to files + elif params.output_format == "text": + writeFunc = printPlainText # print to stdout + else: + assert False, \ + "Unexpected output format: '{}'".format(params.output_format) + + writeFunc(l10nResPoolMgr, translations) + + nbWhitespaceProblemsInTransl = sum( + (translData.nbWhitespacePbs for translData in translations.values() )) + info("total number of leading and/or trailing whitespace problems: {}" + .format(nbWhitespaceProblemsInMaster + nbWhitespaceProblemsInTransl)) + + sys.exit(0) + + +if __name__ == "__main__": main() diff --git a/python3-flightgear/fg-new-translations b/python3-flightgear/fg-new-translations new file mode 100755 index 0000000..0da1453 --- /dev/null +++ b/python3-flightgear/fg-new-translations @@ -0,0 +1,125 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +# fg-new-translations --- Create new translations for FlightGear +# Copyright (C) 2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import argparse +import collections +import locale +import os +import sys + +try: + import xml.etree.ElementTree as et +except ImportError: + import elementtree.ElementTree as et + +import flightgear.meta.logging +import flightgear.meta.i18n as fg_i18n + + +PROGNAME = os.path.basename(sys.argv[0]) + +# Only messages with severity >= info will be printed to the terminal (it's +# possible to also log all messages to a file regardless of their level, see +# the Logger class). Of course, there is also the standard logging module... +logger = flightgear.meta.logging.Logger( + progname=PROGNAME, + logLevel=flightgear.meta.logging.LogLevel.info, + defaultOutputStream=sys.stderr) + + +def processCommandLine(): + params = argparse.Namespace() + + parser = argparse.ArgumentParser( + usage="""\ +%(prog)s [OPTION ...] LANGUAGE_CODE... +Write the skeleton of XLIFF translation files.""", + description="""\ +This program writes XLIFF translation files with the strings to translate +for the specified languages (target strings are empty). This is what you need +to start a translation for a new language.""", + formatter_class=argparse.RawDescriptionHelpFormatter, + # I want --help but not -h (it might be useful for something else) + add_help=False) + + parser.add_argument("-t", "--transl-dir", + help="""\ + directory containing all translation subdirs (such as + {default!r}, 'en_GB', 'fr_FR', 'de', 'it'...). This + "option" MUST be specified.""".format( + default=fg_i18n.DEFAULT_LANG_DIR)) + parser.add_argument("lang_code", metavar="LANGUAGE_CODE", nargs="+", + help="""\ + codes of languages to create translations for (e.g., fr, + fr_BE, en_GB, it, es_ES...)""") + parser.add_argument("-o", "--output-file", + help="""\ + where to write the output to (use '-' for standard + output); if not specified, a suitable file under + TRANSL_DIR will be chosen for each LANGUAGE_CODE. + Note: this option can only be given when exactly one + LANGUAGE_CODE has been specified on the command + line (it doesn't make sense otherwise).""") + parser.add_argument("--output-format", default="xliff", + choices=fg_i18n.FORMAT_HANDLERS_NAMES, + help="format to use for the output files") + parser.add_argument("--help", action="help", + help="display this message and exit") + + params = parser.parse_args(namespace=params) + + if params.transl_dir is None: + logger.error("--transl-dir must be given, aborting") + sys.exit(1) + + if params.output_file is not None and len(params.lang_code) > 1: + logger.error("--output-file can only be given when exactly one " + "LANGUAGE_CODE has been specified on the command line " + "(it doesn't make sense otherwise)") + sys.exit(1) + + return params + + +def main(): + global params + + locale.setlocale(locale.LC_ALL, '') + params = processCommandLine() + + l10nResPoolMgr = fg_i18n.L10NResourcePoolManager(params.transl_dir, logger) + xliffFormatHandler = fg_i18n.FORMAT_HANDLERS_MAP[params.output_format]() + + if params.output_file is not None: + assert len(params.lang_code) == 1, params.lang_code + # Output to one file or to stdout + l10nResPoolMgr.writeSkeletonTranslation( + xliffFormatHandler, params.lang_code[0], + filePath=params.output_file) + else: + # Output to several files + for langCode in params.lang_code: + l10nResPoolMgr.writeSkeletonTranslation(xliffFormatHandler, + langCode) + + sys.exit(0) + + +if __name__ == "__main__": main() diff --git a/python3-flightgear/fg-update-translation-files b/python3-flightgear/fg-update-translation-files new file mode 100755 index 0000000..e3c4c10 --- /dev/null +++ b/python3-flightgear/fg-update-translation-files @@ -0,0 +1,184 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +# fg-update-translation-files --- Merge new default translation, +# remove obsolete strings from a translation +# Copyright (C) 2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import argparse +import enum +import locale +import os +import sys + +try: + import xml.etree.ElementTree as et +except ImportError: + import elementtree.ElementTree as et + +import flightgear.meta.logging +import flightgear.meta.i18n as fg_i18n + + +PROGNAME = os.path.basename(sys.argv[0]) + +# Only messages with severity >= info will be printed to the terminal (it's +# possible to also log all messages to a file regardless of their level, see +# the Logger class). Of course, there is also the standard logging module... +logger = flightgear.meta.logging.Logger( + progname=PROGNAME, + logLevel=flightgear.meta.logging.LogLevel.info, + defaultOutputStream=sys.stderr) + + +def processCommandLine(): + params = argparse.Namespace() + + parser = argparse.ArgumentParser( + usage="""\ +%(prog)s [OPTION ...] ACTION LANGUAGE_CODE... +Update FlightGear XLIFF localization files.""", + description="""\ +This program performs the following operations (actions) on FlightGear XLIFF +translation files (*.xlf): + + - [merge-new-master] + Read the default translation[1], add new translated strings it contains to + the XLIFF localization files corresponding to the specified language(s), + mark the translated strings in said files that need review (modified in + the default translation) as well as those that are not used anymore + (disappeared in the default translation, or marked in a way that says they + don't need to be translated); + + - [mark-unused] + Read the default translation and mark translated strings (in the XLIFF + localization files corresponding to the specified language(s)) that are + not used anymore; + + - [remove-unused] + In the XLIFF localization files corresponding to the specified + language(s), remove all translated strings that are marked as unused. + +A translated string that is marked as unused is still present in the XLIFF +localization file; it is just presented in a way that tells translators they +don't need to worry about it. On the other hand, when a translated string is +removed, translators don't see it anymore and the translation is lost, except +if rescued by external means such as backups or version control systems (Git, +Subversion, etc.) + +Note that the 'remove-unused' action does *not* imply 'mark-unused'. It only +removes translation units that are already marked as unused (i.e., with +translate="no"). Thus, it makes sense to do 'mark-unused' followed by +'remove-unused' if you really want to get rid of old translations (you need to +invoke the program twice, or make a small change for this). Leaving unused +translated strings marked as such in XLIFF files shouldn't harm much in +general on the short or mid-term: they only take some space. + +[1] FlightGear XML files in $FG_ROOT/Translations/default containing strings + used for the default locale (English).""", + formatter_class=argparse.RawDescriptionHelpFormatter, + # I want --help but not -h (it might be useful for something else) + add_help=False) + + parser.add_argument("-t", "--transl-dir", + help="""\ + directory containing all translation subdirs (such as + {default!r}, 'en_GB', 'fr_FR', 'de', 'it'...). This + "option" MUST be specified.""".format( + default=fg_i18n.DEFAULT_LANG_DIR)) + parser.add_argument("action", metavar="ACTION", + choices=("merge-new-master", + "mark-unused", + "remove-unused"), + help="""\ + what to do: merge a new default (= master) + translation, or mark unused translation units, or + remove those already marked as unused from the XLIFF + files corresponding to each given LANGUAGE_CODE (i.e., + those that are not in the default translation)""") + parser.add_argument("lang_code", metavar="LANGUAGE_CODE", nargs="+", + help="""\ + codes of languages to operate on (e.g., fr, en_GB, it, + es_ES...)""") + parser.add_argument("--help", action="help", + help="display this message and exit") + + params = parser.parse_args(namespace=params) + + if params.transl_dir is None: + logger.error("--transl-dir must be given, aborting") + sys.exit(1) + + return params + + +class MarkOrRemoveUnusedAction(enum.Enum): + mark, remove = range(2) + + +def markOrRemoveUnused(l10nResPoolMgr, action): + formatHandler = fg_i18n.XliffFormatHandler() + masterTransl = l10nResPoolMgr.readFgMasterTranslation().transl + + for langCode in params.lang_code: + xliffPath = formatHandler.defaultFilePath(params.transl_dir, langCode) + transl = formatHandler.readTranslation(xliffPath) + + if action == MarkOrRemoveUnusedAction.mark: + transl.markObsoleteOrVanished(masterTransl, logger=logger) + elif action == MarkOrRemoveUnusedAction.remove: + transl.removeObsoleteOrVanished(logger=logger) + else: + assert False, "unexpected action: {!r}".format(action) + + l10nResPoolMgr.writeTranslation(formatHandler, transl, + filePath=xliffPath) + + +def mergeNewMaster(l10nResPoolMgr): + formatHandler = fg_i18n.XliffFormatHandler() + masterTransl = l10nResPoolMgr.readFgMasterTranslation().transl + + for langCode in params.lang_code: + xliffPath = formatHandler.defaultFilePath(params.transl_dir, langCode) + transl = formatHandler.readTranslation(xliffPath) + transl.mergeMasterTranslation(masterTransl, logger=logger) + l10nResPoolMgr.writeTranslation(formatHandler, transl, + filePath=xliffPath) + + +def main(): + global params + + locale.setlocale(locale.LC_ALL, '') + params = processCommandLine() + + l10nResPoolMgr = fg_i18n.L10NResourcePoolManager(params.transl_dir, logger) + + if params.action == "mark-unused": + markOrRemoveUnused(l10nResPoolMgr, MarkOrRemoveUnusedAction.mark) + elif params.action == "remove-unused": + markOrRemoveUnused(l10nResPoolMgr, MarkOrRemoveUnusedAction.remove) + elif params.action == "merge-new-master": + mergeNewMaster(l10nResPoolMgr) + else: + assert False, "Bug: unexpected action: {!r}".format(params.action) + + sys.exit(0) + + +if __name__ == "__main__": main() diff --git a/python3-flightgear/flightgear/__init__.py b/python3-flightgear/flightgear/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python3-flightgear/flightgear/meta/__init__.py b/python3-flightgear/flightgear/meta/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python3-flightgear/flightgear/meta/exceptions.py b/python3-flightgear/flightgear/meta/exceptions.py new file mode 100644 index 0000000..79b698f --- /dev/null +++ b/python3-flightgear/flightgear/meta/exceptions.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# exceptions.py --- Simple, general-purpose subclass of Exception +# +# Copyright (C) 2015, 2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +"""Simple, general-purpose Exception subclass.""" + + +class FGPyException(Exception): + def __init__(self, message=None, *, mayCapitalizeMsg=True): + """Initialize an FGPyException instance. + + Except in cases where 'message' starts with a proper noun or + something like that, its first character should be given in + lower case. Automated treatments of this exception may print the + message with its first character changed to upper case, unless + 'mayCapitalizeMsg' is False. In other words, if the case of the + first character of 'message' must not be changed under any + circumstances, set 'mayCapitalizeMsg' to False. + + """ + self.message = message + self.mayCapitalizeMsg = mayCapitalizeMsg + + def __str__(self): + return self.completeMessage() + + def __repr__(self): + return "{}.{}({!r})".format(__name__, type(self).__name__, self.message) + + # Typically overridden by subclasses with a custom constructor + def detail(self): + return self.message + + def completeMessage(self): + if self.message: + return "{shortDesc}: {detail}".format( + shortDesc=self.ExceptionShortDescription, + detail=self.detail()) + else: + return self.ExceptionShortDescription + + ExceptionShortDescription = "FlightGear Python generic exception" diff --git a/python3-flightgear/flightgear/meta/i18n.py b/python3-flightgear/flightgear/meta/i18n.py new file mode 100644 index 0000000..6eba1e8 --- /dev/null +++ b/python3-flightgear/flightgear/meta/i18n.py @@ -0,0 +1,1822 @@ +# -*- coding: utf-8 -*- + +# i18n.py --- Utility functions and classes for FlightGear's +# internationalization +# Copyright (C) 2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# ***************************************************************************** +# Terminology: +# +# category: this corresponds to a “resource” in FlightGear's C++ code +# (e.g., flightgear/src/Main/locale.cxx). +# Examples: menu, options, sys, tips. +# +# master string: +# a translatable string before it is translated. In FlightGear, +# this is in English---I believe U.S. English (en_US) to be +# accurate. In Qt Linguist's source code (C++), this is called +# “source text” (cf. TranslatorMessage::sourceText() in +# qt5.git/qttools/src/linguist/shared/translatormessage.h). +# +# master translation: +# also called the “default translation”. It is made of the English +# strings (in $FG_ROOT/Translations/default) that are to be +# translated into other languages. +# +# tid: variable name I use for an instance of a subclass of +# AbstractTranslationUnitId +# +# ***************************************************************************** + +import abc +import collections +import enum +import functools +import os +import pprint +import re +import sys + +try: + import xml.etree.ElementTree as et +except ImportError: + import elementtree.ElementTree as et + +from textwrap import indent, dedent + +from . import misc +from .logging import DummyLogger +from .exceptions import FGPyException + +dummyLogger = DummyLogger() + +# Not including "atc", because it has no translation. Please keep this sorted. +CATEGORIES = ("menu", "options", "sys", "tips") +# Directory name for the default (master) translation +DEFAULT_LANG_DIR = "default" +# Root of the base name for the default output files (XLIFF...) +L10N_FILENAME_BASE = "FlightGear-nonQt" + +# Every subclass of AbstractFormatHandler should register itself here +# using registerFormatHandler(). This allows automatic selection of the +# proper format handler based on user input (e.g., a command-line option +# such as --format=xliff). +FORMAT_HANDLERS_MAP = {} +FORMAT_HANDLERS_NAMES = [] + +# The plural forms for each language should be listed in the same order +# as in Qt Linguist (either look in the Linguist GUI or in +# qttools/src/linguist/shared/numerus.cpp). +PLURAL_FORMS = { + None: [""], # for the default (= master) translation + "de": ["singular", "plural"], + "en": ["singular", "plural"], + "es": ["singular", "plural"], + "fr": ["singular", "plural"], + "it": ["singular", "plural"], + "nl": ["singular", "plural"], + "pl": ["singular", "paucal", "plural"], + "pt": ["singular", "plural"], + "zh": ["universal"] # universal form +} + +# Regexps for parsing language codes +FGLocale_cre = re.compile( + "(?P[a-zA-Z]+)(_(?P[a-zA-Z0-9]+))?") +# This is a simplified version compared to what the RFC allows +RFC4646Locale_cre = re.compile( + "(?P[a-zA-Z]+)(-(?P[a-zA-Z0-9]+))?") + + +def pluralFormsForLanguage(langCode): + try: + pluralForms = PLURAL_FORMS[langCode] + except KeyError: + mo = FGLocale_cre.match(langCode) + assert mo is not None + + try: + pluralForms = PLURAL_FORMS[mo.group("language")] + except KeyError: + raise MissingLocaleMetadata( + "PLURAL_FORMS data is missing for locale {!r}".format(langCode)) + + return pluralForms + +# Trivial, but this is what we'll need most of the times here. +def nbPluralFormsForLanguage(langCode): + return len(pluralFormsForLanguage(langCode)) + + +def registerFormatHandler(fmtName, fmtHandlerClass): + global FORMAT_HANDLERS_NAMES + + FORMAT_HANDLERS_MAP[fmtName] = fmtHandlerClass + FORMAT_HANDLERS_NAMES = sorted(FORMAT_HANDLERS_MAP.keys()) + + +# ***************************************************************************** +# * Custom exceptions * +# ***************************************************************************** + +class error(FGPyException): + """Base class for exceptions raised in this module.""" + ExceptionShortDescription = "Generic exception" + +class BadAPIUse(error): + """Exception raised when this module's API is used incorrectly.""" + ExceptionShortDescription = "Bad API use" + +class TranslationFileParseError(error): + """Exception raised when parsing a translation file fails.""" + ExceptionShortDescription = "Error parsing a translation file" + +class XliffParseError(TranslationFileParseError): + """Exception raised when parsing an XLIFF file fails.""" + ExceptionShortDescription = "Error parsing an XLIFF file" + +class XliffLogicalWriteError(error): + """ + Exception raised when writing an XLIFF file fails for some logical reason.""" + ExceptionShortDescription = "Error writing an XLIFF file" + +class MissingLocaleMetadata(error): + """ + Exception raised when locale-specific metadata is needed but unavailable.""" + ExceptionShortDescription = "Missing locale metadata" + + +# ***************************************************************************** +# * TranslationUnit & friends * +# ***************************************************************************** + +# Abstract base class +class AbstractTranslationUnitId(metaclass=abc.ABCMeta): + """Abstract base class for the ID of a TranslationUnit (“tid“). + + This key is used to access a given TranslationUnit from a + Translation instance. If 't' is a Translation instance and 'cat' a + category, then t[cat] is a mapping whose keys are instances of a + subclass of AbstractTranslationUnitId, and values are + TranslationUnit instances: t[cat][tid] is a TranslationUnit instance + for appropriate tid objects. + + Each subclass must define (as instance or class member) a 'cat' + attribute that must contain a non-empty string. + + """ + @abc.abstractmethod + def id(self): + raise NotImplementedError + + @abc.abstractmethod + def __str__(self): + raise NotImplementedError + + @abc.abstractmethod + def __eq__(self, other): + raise NotImplementedError + + @abc.abstractmethod + def __lt__(self, other): + raise NotImplementedError + + @abc.abstractmethod + def __hash__(self): + raise NotImplementedError + + +@functools.total_ordering +class BasicTranslationUnitId(AbstractTranslationUnitId): + + # Helper regexp for parsing the result of str() applied to an instance of + # this class. + regexp = re.compile(r"""^(?P [^/:]+) / + (?P [^/:]+) : + (?P \d+)$""", + re.VERBOSE) + + # Same as above with one more field (group), 'pluralIndex'. It is used for + # in XLIFF files generated by Qt Linguist. In + # this library, we pack all plural forms belonging together into *one* + # TranslationUnit instance, which only has one associated id that we call + # “tid”, not containing any pluralIndex. So, what this regexp parses is a + # “tid” followed by an optional plural form index inside brackets. + xliffRegexp = re.compile(r"""^(?P [^/:]+) / + (?P [^/:]+) : + (?P \d+) + (\[ (?P\d+) \])?$""", + re.VERBOSE) + + def __init__(self, cat, basicId, index): + self.cat = cat # category ("menu", "tips", options"...) + self.basicId = basicId # string (an XML tag name) + self.index = index # integer (a PropertyList node index) + + def id(self): + return "{}:{}".format(self.basicId, self.index) + + def __str__(self): + return "{}/{}".format(self.cat, self.id()) + + def __repr__(self): + return "{}.{}({!r}, {!r}, {!r})".format( + __name__, type(self).__name__, self.cat, self.basicId, self.index) + + def _key(self): + return (self.cat, (self.basicId, self.index)) + + # The other comparisons are deduced from these by the + # functools.total_ordering decorator. + def __lt__(self, other): + if type(self) is type(other): + return self._key() < other._key() + else: + return NotImplemented + + def __eq__(self, other): + return type(self) is type(other) and self._key() == other._key() + + def __hash__(self): + return hash((type(self), self._key())) + + +class ContextDevComment: + """Class representing a context developer comment. + + Such a comment is crafted from the XLIFF output of Qt Linguist, it + may have subcomments. + + """ + def __init__(self, mainComment, *, translatorComments=None, + developerComments=None): + """Initialize a ContextDevComment instance.""" + self.mainComment = mainComment + self.translatorComments = ( + list(translatorComments) if translatorComments is not None else []) + self.developerComments = ( + list(developerComments) if developerComments is not None else []) + + def customRepr(self, className): + """Represent an instance of this class. + + Multiline representation with indentation before all args but + the first. The 'className' parameter simply lets the caller + decide whether he wants a qualified or unqualified name, because + the qualified one is likely to shift the second and subsequent + lines a lot to the right. + + """ + joint = ",\n" + (" "*(len(className) + 1)) + args = [repr(self.mainComment), + "translatorComments={!r}".format(self.translatorComments), + "developerComments={!r}".format(self.developerComments)] + + return "{}({})".format(className, joint.join(args)) + + def __repr__(self): + # Qualified class name + return self.customRepr("{}.{}".format(__name__, type(self).__name__)) + + def __str__(self): + # Just the class name: much shorter than what we use in __repr__() + return self.customRepr(type(self).__name__) + + def copy(self): + """Return a new TranslationUnit instance that is a copy of 'self'.""" + return type(self)(self.mainComment, + translatorComments=self.translatorComments, + developerComments=self.developerComments) + + def strings(self): + return dedent("""\ + mainComment = {self.mainComment!r} + translatorComments = {self.translatorComments!r} + developerComments = {self.developerComments!r}""").format(self=self) + + +@functools.total_ordering +class TranslationUnit: + + """Class containing a source string and its translations for a given locale. + + Roughly corresponds to XLIFF's element or Qt Linguist's + TranslatorMessage class. + + """ + def __init__(self, targetLanguage, sourceText, targetTexts, *, + approved=False, translate=True, translatorComments=None, + developerComments=None, isPlural=False): + """Initialize a TranslationUnit instance. + + The default values for 'approved' and 'translate' correspond + to the defaults in the XLIFF 1.2 specification when the + identically-named attributes aren't specified. + + In Qt Linguist's TranslatorMessage class, the combination of + 'approved' and 'translate' corresponds to an enum value: + enum Type { Unfinished, Finished, Vanished, Obsolete }: + + \ | + \ translate | True False + approved \ | + ---------------------+------------------------------------ + True | Finished Vanished + | + False | Unfinished Obsolete + + 'targetLanguage' is the target language code (e.g., 'de' or + 'fr_BE'). It is used to determine the number of plural forms, + and thus the number of elements 'targetTexts' must evaluate to + (see below). 'targetLanguage' should be None for all + TranslationUnit instances of the default translation. + + 'targetTexts' must be an iterable of strings with at least one + element. If it has several, they denote plural forms. + + """ + self.sourceText = sourceText + self.targetLanguage = targetLanguage + for attr in ("approved", "translate", "isPlural"): + setattr(self, attr, bool(locals()[attr])) + + self.setTargetTexts(targetTexts) # *after* setting isPlural + + # Note: Linguist 5.7.1 only keeps the last comment of each type when + # reading an XLIFF file containing several consecutive elements. + self.translatorComments = ( + list(translatorComments) if translatorComments is not None else []) + self.developerComments = ( + list(developerComments) if developerComments is not None else []) + + def setTargetTexts(self, targetTexts): + if isinstance(targetTexts, str): # prevent an easy error + raise TypeError( + "'targetTexts' should not be a string: {!r}" + .format(targetTexts)) + + l = list(targetTexts) # enforce the type and copy + nbPluralForms = nbPluralFormsForLanguage(self.targetLanguage) + + if self.isPlural and len(l) != nbPluralForms: + raise BadAPIUse( + "trying to set the targetTexts list for a plural " + "TranslationUnit, however len(targetTexts) doesn't match the " + "number of plural forms for the target language:\n" + " targetTexts = {targetTexts!r}\n" + " nb plural forms = {nbPluralForms}".format( + targetTexts=l, nbPluralForms=nbPluralForms)) + elif not self.isPlural and len(l) != 1: + raise BadAPIUse( + "a non-plural TranslationUnit instance must have " + "len(targetTexts) == 1, however we have targetTexts = {!r}" + .format(l)) + + # This check is most likely redundant with the previous ones, but + # doesn't hurt. + if not l: + raise BadAPIUse("the 'targetTexts' iterable should not be empty") + + self.targetTexts = l + + def customRepr(self, className): + """Represent an instance of this class. + + Multiline representation with indentation before all args but + the first. The 'className' parameter simply lets the caller + decide whether he wants a qualified or unqualified name, because + the qualified one is likely to shift the second and subsequent + lines a lot to the right. + + """ + joint = ",\n" + (" "*(len(className) + 1)) + args = [repr(self.targetLanguage), repr(self.sourceText), + repr(self.targetTexts), + "approved={!r}".format(self.approved), + "translate={!r}".format(self.translate), + "translatorComments={!r}".format(self.translatorComments), + "developerComments={!r}".format(self.developerComments), + "isPlural={!r}".format(self.isPlural)] + + return "{}({})".format(className, joint.join(args)) + + def __repr__(self): + # Qualified class name + return self.customRepr("{}.{}".format(__name__, type(self).__name__)) + + def __str__(self): + # Just the class name: much shorter than what we use in __repr__() + return self.customRepr(type(self).__name__) + + def copy(self): + """Return a new TranslationUnit instance that is a copy of 'self'.""" + return type(self)(self.targetLanguage, + self.sourceText, self.targetTexts, + approved=self.approved, + translate=self.translate, + translatorComments=self.translatorComments, + developerComments=self.developerComments, + isPlural=self.isPlural) + + def _key(self): + """Key used to compare two TranslationUnit instances.""" + return (self.targetLanguage, self.sourceText, self.targetTexts, + self.isPlural, self.developerComments, self.translatorComments, + self.approved, self.translate) + + # The other comparisons are deduced from these by the + # functools.total_ordering decorator. + def __lt__(self, other): + if type(self) is type(other): + return self._key() < other._key() + else: + return NotImplemented + + def __eq__(self, other): + return type(self) is type(other) and self._key() == other._key() + + def __hash__(self): + return hash(self._key()) + + def _stringsKey(self): + """Key used to compare the strings of two TranslationUnit instances.""" + return (self.self.sourceText, self.targetTexts, self.developerComments, + self.translatorComments) + + def sameStrings(self, other): + return self._stringsKey() == other._stringsKey() + + def strings(self): + # Note that this omits the 'translate' and 'approved' attributes (which + # are not strings). + return dedent("""\ + sourceText = {self.sourceText!r} + targetTexts = {self.targetTexts!r} + translatorComments = {self.translatorComments!r} + developerComments = {self.developerComments!r}""").format(self=self) + + def mayNeedReview(self, other): + return ((self.sourceText, self.isPlural, self.developerComments) + != + (other.sourceText, other.isPlural, other.developerComments)) + + def fixSizeOfTargetTexts(self): + if self.isPlural: + nbPluralForms = nbPluralFormsForLanguage(self.targetLanguage) + else: + nbPluralForms = 1 + + if len(self.targetTexts) > nbPluralForms: + # Too long -> trim self.targetTexts + del self.targetTexts[nbPluralForms:] + elif len(self.targetTexts) < nbPluralForms: + # Too short -> add empty translations + self.targetTexts.extend( + [""] * (nbPluralForms - len(self.targetTexts))) + + def mergeMasterTranslationUnit(self, masterTu, *, approved=False): + """Merge a master translation unit into self. + + self.targetLanguage and self.translatorComments are not touched; + self.targetTexts is only trimmed or extended as needed if + isPlural is changed; self.approved is set according to the + corresponding argument, other attributes are copied. + + """ + self.sourceText = masterTu.sourceText + self.developerComments = list(masterTu.developerComments) + self.approved = approved + self.translate = masterTu.translate + self.isPlural = masterTu.isPlural + + self.fixSizeOfTargetTexts() # needed because of the change to 'isPlural' + + +class Translation: + def __init__(self, sourceLanguage, targetLanguage): + """Initialize a Translation instance. + + 'sourceLanguage' and' targetLanguage' must be of the form ll or + ll_TT (e.g., en, en_GB, fr, fr_FR, fr_CA...), except for the + default translation (see below). + + The default translation (master) is characterized by the fact + that its 'targetLanguage' attribute is None. For each + TranslationUnit instance it contains (cf. the 'translations' + attribute and __iter__()), the 'sourceText' is an en_US string + and the 'targetTexts' is a list containing one element: the + empty string. + + """ + for attr in ("sourceLanguage", "targetLanguage"): + setattr(self, attr, locals()[attr]) + + # Allows straightforward iteration over sorted categories + self.translations = collections.OrderedDict() + # Qt Linguist uses empty-source-text comments as “context comments”, + # which are developer comments about a context. Each of these is + # written as a in XLIFF. Two such comments compare equal + # in Linguist as soon as they are in the same + # x-trolltech-linguist-context. cf. bool + # operator==(TranslatorMessageContentPtr tmp1, + # TranslatorMessageContentPtr tmp2) in + # qt5.git/qttools/src/linguist/shared/translator.cpp. + self.contextDevComments = collections.OrderedDict() + + for cat in CATEGORIES: + # Keys: instances of a subclass of AbstractTranslationUnitId + # (“tid”) + # Values: TranslationUnit instances + self.translations[cat] = {} + # List of ContextDevComment instances + self.contextDevComments[cat] = [] + + def __str__(self): + l = [dedent("""\ + Translation: + sourceLanguage = {!r} + targetLanguage = {!r}""").format(self.sourceLanguage, + self.targetLanguage)] + + for cat, d in self.translations.items(): + if self.contextDevComments[cat]: + s = "\n\n".join(( indent(c.strings(), " ") + for c in self.contextDevComments[cat] )) + ctxDevComments = "Context developer comments:\n\n{}".format(s) + else: + ctxDevComments = "Context developer comments: none" + + tUnits = ["{}\n{}".format(tid, tu) for tid, tu in sorted(d.items())] + translUnits = "Translation units:\n\n{}".format( + "\n\n".join(tUnits)) + + categoryHeading = "Category: {cat!r}".format(cat=cat) + l.append("\n\n{categoryHeading}\n{underline}\n\n" + "{ctxDevComments}\n\n" + "{translUnits}".format( + categoryHeading=categoryHeading, + underline="-"*len(categoryHeading), + ctxDevComments=ctxDevComments, + translUnits=translUnits)) + + return ''.join(l) + + def __getitem__(self, cat): + return self.translations[cat] + + def __setitem__(self, cat, translUnit): + self.translations[cat] = translUnit + + def __iter__(self): + return iter(self.translations) + + def __contains__(self, cat): + return (cat in self.translations) + + def resetCategory(self, cat): + self.translations[cat] = {} + + # tid: an instance of a subclass of AbstractTranslationUnitId. + def addMasterString(self, tid, sourceText, isPlural=False): + # - target language -> None + # - only the master string (source text) + # - one empty target text + # - carry the plural status + self.translations[tid.cat][tid] = TranslationUnit( + None, sourceText, [""], isPlural=isPlural) + + def addTranslation(self, masterTransl, tid, sourceText, targetTexts, *, + translatorComments=None, developerComments=None, + isPlural=False, logger=dummyLogger): + """Add a TranslationUnit to a Translation instance, with some checks. + + sourceText: string + targetTexts: iterable of strings + + """ + category = tid.cat + + if tid not in masterTransl[category]: + # Is it the “best” behavior? + logger.warning( + "{lang}/{cat}: translated string not in master file: {id!r}" + .format(lang=self.targetLanguage, cat=category, id=tid.id())) + return + + t = TranslationUnit(self.targetLanguage, sourceText, targetTexts, + isPlural=isPlural, + translatorComments=translatorComments, + developerComments=developerComments) + thisCatTranslations = self.translations[category] + + if tid in thisCatTranslations: + if thisCatTranslations[tid].sameStrings(t): + if thisCatTranslations[tid].isPlural != t.isPlural: + complement = " one has plural forms, the other not" + else: + complement = " identical strings" + else: + complement = "\nold:\n{old}\n\nnew:\n{new}".format( + old=indent(thisCatTranslations[tid].strings(), " "), + new=indent(t.strings(), " ")) + + logger.warning("{lang}/{cat}: duplicate translated string: {id!r}:" + "{complement}" + .format(lang=self.targetLanguage, cat=category, + id=tid.id(), complement=complement)) + + thisCatTranslations[tid] = t + + def markObsoleteOrVanishedInCategory(self, masterTransl, cat, + logger=dummyLogger): + thisCatTranslations = self.translations[cat] + masterIdsList = frozenset( + ( str(tid) for tid in masterTransl[cat].keys() )) + + for tid, translUnit in thisCatTranslations.items(): + if (str(tid) not in masterIdsList and + thisCatTranslations[tid].translate): + # Obsolete or vanished (depending on whether it is approved) + logger.info( + "{lang}: translatable string '{id}' not found in the " + "default translation -> setting translate='no'" + .format(lang=self.targetLanguage, id=tid)) + thisCatTranslations[tid].translate = False + + def markObsoleteOrVanished(self, masterTransl, *, logger=dummyLogger): + for cat in self.translations: + self.markObsoleteOrVanishedInCategory(masterTransl, cat, + logger=logger) + + def removeObsoleteOrVanishedInCategory(self, cat, *, logger=dummyLogger): + thisCatTranslations = self.translations[cat] + # Find all tid's from self.translations[cat] whose corresponding + # translation unit 'tu' has tu.translate == False. + tidsToRemove = [ tid for tid, translUnit in thisCatTranslations.items() + if not translUnit.translate ] + + # Remove the corresponding elements from self.translations[cat] + for tid in tidsToRemove: + translUnit = thisCatTranslations[tid] + qualifier = "vanished" if translUnit.approved else "obsolete" + logger.info( + "{lang}: removing {qualifier} translated string '{id}'" + .format(lang=self.targetLanguage, qualifier=qualifier, id=tid)) + del thisCatTranslations[tid] + + def removeObsoleteOrVanished(self, *, logger=dummyLogger): + for cat in self.translations: + self.removeObsoleteOrVanishedInCategory(cat, logger=logger) + + def mergeMasterForCategory(self, masterTransl, cat, logger=dummyLogger): + if cat not in masterTransl: + raise BadAPIUse("Bad API use: category {!r} not in " \ + "'masterTransl'".format(cat)) + elif cat not in self: + # Category appeared in 'masterTransl' that wasn't in 'self' + self.resetCategory(cat) + + self.contextDevComments[cat] = \ + [ comment.copy() + for comment in masterTransl.contextDevComments[cat] ] + thisCatTranslations = self.translations[cat] + idsSet = { str(tid) for tid in thisCatTranslations.keys() } + + for masterTid, masterTu in masterTransl.translations[cat].items(): + if str(masterTid) not in idsSet: + logger.info( + "{lang}: adding new translatable string '{id}'" + .format(lang=self.targetLanguage, id=masterTid)) + self.addTranslation( + masterTransl, masterTid, masterTu.sourceText, [""], + developerComments=masterTu.developerComments, + isPlural=masterTu.isPlural, logger=logger) + idsSet.add(masterTid) + elif thisCatTranslations[masterTid].mayNeedReview(masterTu): + thisCatTranslations[masterTid].mergeMasterTranslationUnit( + masterTu, approved=False) + logger.info( + "{lang}: '{id}': source text, developer comments or " + "plural/non plural status changed -> needs translator " + "review".format(lang=self.targetLanguage, id=masterTid)) + + # At this point, thisCatTranslations has a translation unit with id + # masterTid. At the time of this writing, all translation units in + # the default translation have translate=True, but just in case, + # let's copy this attribute from the master translation unit if + # they are different. + current = thisCatTranslations[masterTid].translate + new = masterTu.translate + if current != new: + logger.info( + "{lang}: setting translate='{translateVal}' for " + "translatable string '{id}'" + .format(lang=self.targetLanguage, + id=masterTid, translateVal="yes" if new else "no")) + thisCatTranslations[masterTid].translate = new + + self.markObsoleteOrVanishedInCategory(masterTransl, cat, logger=logger) + + def mergeMasterTranslation(self, masterTransl, logger=dummyLogger): + """Update all categories in 'self' based on 'masterTransl'.""" + for cat in masterTransl: + self.mergeMasterForCategory(masterTransl, cat, logger=logger) + + # Find all empty categories in 'self' that are not in 'masterTransl' + categoriesToRemove = [ cat for cat in self + if not self[cat] and cat not in masterTransl ] + + # Now, remove them from 'self' + for cat in categoriesToRemove: + logger.info( + "{lang}: removing empty category '{cat}' not found in master" + .format(lang=self.targetLanguage, cat=cat)) + del self[cat] + + def nbPluralForms(self): + return nbPluralFormsForLanguage(self.targetLanguage) + + +def langCodeForXliff(langCode): + """Convert a string from ll_TT format to ll-TT (RFC 4646). + + It's okay if only the 'll' part is given, with no underscore. + + """ + mo = FGLocale_cre.match(langCode) + + if not mo: + assert False, "Unexpected FG locale: '{}'".format(langCode) + + lang, territory = mo.group("language", "territory") + + assert lang, repr(lang) # neither None nor the empty string + if territory is None: + return lang.lower() + else: + # Complies with RFC 4646, as specified in the XLIFF 1.2 spec. + return "{}-{}".format(lang.lower(), territory.upper()) + +def langCodeInll_TTformat(langCode): + """Convert a string from ll-TT format (RFC 4646) to ll_TT. + + It's okay if only the 'll' part is given, with no hyphen. + + """ + mo = RFC4646Locale_cre.match(langCode) + + if not mo: + assert False, "Unexpected RFC 4646-style locale: '{}'".format(langCode) + + lang, territory = mo.group("language", "territory") + + assert lang, repr(lang) # neither None nor the empty string + if territory is None: + return lang.lower() + else: + return "{}_{}".format(lang.lower(), territory.upper()) + + +class XliffVariables(enum.Enum): + QtContext, gettextContext, gettextPreviousContext, translate, \ + lineNumber, sourceFile = range(6) + + +class NestedScopes: + """Simple implementation of nested scopes for XLIFF “variables”.""" + + def __init__(self): + self.scopes = collections.deque() + + def enterScope(self): + self.scopes.append({}) + + def exitScope(self): + self.scopes.pop() + + def __setitem__(self, variable, value): + """Set a variable at the innermost scope.""" + self.scopes[-1][variable] = value + + def __getitem__(self, variable): + """Get a variable value. Traverse scopes as needed.""" + for scope in reversed(self.scopes): + if variable in scope: + return scope[variable] + + raise KeyError(variable) + + def __iter__(self): + return iter(frozenset(( var for scope in self.scopes + for var in scope.keys() ))) + + def __contains__(self, variable): + try: + self[variable] + except KeyError: + return False + + return True + + def hasAtInnerMostScope(self, variable): + """Tell if a variable is set in the innermost scope.""" + return variable in self.scopes[-1] + + +def insideScope(method): + """Decorator: create a scope upon method entry and leave it upon exit.""" + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + self.scopedVars.enterScope() + + try: + res = method(self, *args, **kwargs) + finally: + self.scopedVars.exitScope() + + return res + + return wrapper + + +# Abstract base class +class AbstractFormatHandler(metaclass=abc.ABCMeta): + """Abstract base class for format handlers such as XLIFF.""" + + # Subclasses should generally override this (file extension, with no dot) + standardExtension = None + + @classmethod + def defaultFileStem(cls, targetLanguage): + """Expected file stem (for FlightGear) for a given language code.""" + # Currently: no use of the language code here, because the directories + # we put these files in are named after the language code. + return L10N_FILENAME_BASE + + @classmethod + def defaultFileBaseName(cls, targetLanguage): + """Expected file basename (for FlightGear) for a given language code.""" + return "{}.{}".format(cls.defaultFileStem(targetLanguage), + cls.standardExtension) + + @classmethod + def defaultFilePath(cls, translationsDir, targetLanguage): + """ + Expected file path for a given translations directory and language.""" + baseName = cls.defaultFileBaseName(targetLanguage) + return os.path.join(translationsDir, targetLanguage, baseName) + + @abc.abstractmethod + def writeTranslation(self, transl, filePath): + """Write a Translation instance to a file.""" + raise NotImplementedError + + +class XliffFormatReader: + """Read from XLIFF files.""" + + xliffNamespaceURI = "urn:oasis:names:tc:xliff:document:1.2" + # URI reserved for the 'xml' prefix, cf. + # + xmlNamespaceURI = "http://www.w3.org/XML/1998/namespace" + # Mapping from each prefix to the associated namespace + nsMap = {"xliff": xliffNamespaceURI, + "xml": xmlNamespaceURI} + + def __init__(self, file_): + self.file = file_ + # Used to implement “XLIFF variables” such as the current 'translate' + # value: they have scoping properties that generally match elements + # nesting in the XML markup, except, e.g., for contexts defined in a + # itself inside a , which affect *subsequent* + # elements inside the : + # + # “All , , , and + # non-XLIFF elements pertain to the subsequent elements in the tree but + # can be overridden within a child element.” + # + # () + self.scopedVars = NestedScopes() + # Filling this object is the main purpose of this class + self.transl = Translation(None, None) + self.insidePluralGroup = False + # List of (tid, pluralIndex, transl) tuples where each 'transl' is a + # temporary TranslationUnit instance. They will be merged into one when + # the relevant ends (several plural forms of the same string). + self.pluralGroupContents = [] + + def _readXliffBool(self, string_): + if string_ not in ("yes", "no"): + raise XliffParseError( + "{file}: not a valid XLIFF boolean: {val!r}" + .format(file=self.file, val=string_)) + + return (string_ == "yes") + + @classmethod + def qualTagName(cls, unqualified): + """Return a tag name in the XLIFF namespace (using XPath syntax).""" + return "{" + cls.xliffNamespaceURI + "}" + unqualified + + @classmethod + def xmlQualName(cls, unqualified): + """Return a qualified tag or attribute name for the 'xml' prefix. + + This prefix is special and reserved + (): + + The prefix xml is by definition bound to the namespace name + http://www.w3.org/XML/1998/namespace. + + """ + return "{" + cls.xmlNamespaceURI + "}" + unqualified + + def parse(self): + tree = et.parse(self.file) + rootNode = tree.getroot() + + if (rootNode.tag != self.qualTagName("xliff") or + rootNode.get("version") != "1.2"): + raise XliffParseError( + "{file}: this parser only supports (parts of) the XLIFF 1.2 " + "standard, and the root node doesn't seem to conform to this " + "(tag name = {tag!r}, 'version' attribute = {version!r})" + .format(file=self.file, tag=rootNode.tag, + version=rootNode.get("version"))) + + self.scopedVars.enterScope() # so that we can define scoped variables + + try: + # Set default value according to the XLIFF specification + self.scopedVars[XliffVariables.translate] = True + + for fileNode in rootNode.iterfind("./xliff:file", self.nsMap): + self._handleFileNode(fileNode) + finally: + self.scopedVars.exitScope() + + return self.transl + + @insideScope + def _handleFileNode(self, fileNode): + if "source-language" in fileNode.attrib: + self.transl.sourceLanguage = langCodeInll_TTformat( + fileNode.get("source-language")) + + if "target-language" in fileNode.attrib: + self.transl.targetLanguage = langCodeInll_TTformat( + fileNode.get("target-language")) + + headerSeen = False + bodySeen = False + for node in fileNode: + if node.tag == self.qualTagName("header"): + if bodySeen: + raise XliffParseError( + "{file}: 'header' element found after a 'body' element " + "inside a 'file' element".format(file=self.file)) + elif headerSeen: + raise XliffParseError( + "{file}: found more than one 'header' element inside a " + "'file' element, this doesn't conform to the XLIFF 1.2 " + "specification".format(file=self.file)) + else: + headerSeen = True + elif node.tag == self.qualTagName("body"): + if bodySeen: + raise XliffParseError( + "{file}: found more than one 'body' element inside a " + "'file' element, this doesn't conform to the XLIFF 1.2 " + "specification".format(file=self.file)) + else: + bodySeen = True + self._handleBodyNode(node) + + @insideScope + def _handleBodyNode(self, bodyNode): + for node in bodyNode: + if node.tag == self.qualTagName("group"): + self._handleGroupNode(node) + elif node.tag == self.qualTagName("trans-unit"): + self._handleTransUnitNode(node) + elif node.tag == self.qualTagName("bin-unit"): + pass # not implemented + else: + raise XliffParseError( + "{file}: illegal element inside a 'body' element: {tag!r}" + .format(file=self.file, tag=node.tag)) + + def _handlePluralGroup(self, notesDict): + """Handle a group containing related plural forms.""" + sourceTexts = set() + tids = set() + pluralIdxMap = {} # to put the plural indices back in order + # May only be set in and elements + approved = True + # May come from an enclosing + translate = self.scopedVars[XliffVariables.translate] + tmpTargetTexts = [] + + if len(self.pluralGroupContents) != self.transl.nbPluralForms(): + raise XliffParseError( + "{file}: found a plural group with {found} 'transl-unit' " + "elements, however the expected number of plural forms for " + "language {lang!r} is {expected}. Plural group contents: " + "{pluralGroup!r}".format( + file=self.file, lang=self.transl.targetLanguage, + found=len(self.pluralGroupContents), + expected=self.transl.nbPluralForms(), + pluralGroup=self.pluralGroupContents)) + + for i, (tid, pluralIndex, transl) in \ + enumerate(self.pluralGroupContents): + assert isinstance(pluralIndex, int), pluralIndex + + pluralIdxMap[pluralIndex] = i + sourceTexts.add(transl.sourceText) + tids.add(tid) + + approved = approved and transl.approved + translate = translate or transl.translate + # 'transl' has exactly one target text (temporary, non-plural + # TranslationUnit) + tmpTargetTexts.append(transl.targetTexts[0]) + + obtainedIndices = frozenset(pluralIdxMap.keys()) + + if (frozenset(range(len(self.pluralGroupContents))) != obtainedIndices): + raise XliffParseError( + '{file}: incorrect set of indices for plural forms ' + 'inside a group: ' + "{indices!r}".format(file=self.file, + indices=sorted(obtainedIndices))) + elif len(tids) > 1: + raise XliffParseError( + "{file}: all plural forms for the same master string " + "should have the same tid. 'tid's found: {tids!r}" + .format(file=self.file, tids=sorted(tids))) + elif len(sourceTexts) > 1: + raise XliffParseError( + "{file}: all plural forms inside a given " + ' group ' + "should have the same sourceText. 'sourceText's found: " + "{sourceTexts!r}" + .format(file=self.file, sourceTexts=sorted(sourceTexts))) + elif not tids: + pass # empty plural group... + else: + assert len(sourceTexts) == 1, sourceTexts + assert len(tids) == 1, tids + tid = tids.pop() # get the only value + # Reorder the target texts (= plural forms) in proper order in + # case they weren't (which would be surprising...) + targetTexts = [ tmpTargetTexts[pluralIdxMap[i]] + for i in range(len(self.pluralGroupContents)) ] + + translUnit = TranslationUnit( + self.transl.targetLanguage, sourceTexts.pop(), targetTexts, + translatorComments=notesDict["translator"], + developerComments=notesDict["developer"], + approved=approved, translate=translate, isPlural=True) + # Add the TranslationUnit containing all related plural forms + self.transl[tid.cat][tid] = translUnit + + self.pluralGroupContents.clear() + + @insideScope + def _handleGroupNode(self, node): + pluralGroup = False + + if node.get("restype") == "x-trolltech-linguist-context": + QtContext = node.get("resname") + + if QtContext is None: + raise XliffParseError( + "{file}: 'restype' attribute in a group without any " + "corresponding 'resname'".format(file=self.file)) + else: + self.scopedVars[XliffVariables.QtContext] = QtContext + elif node.get("restype") == "x-gettext-plurals": # Qt Linguist's way + pluralGroup = self.insidePluralGroup = True + + translate = node.get("translate") + if translate is not None: + self.scopedVars[XliffVariables.translate] = \ + self._readXliffBool(translate) + notesDict = {"developer": [], + "translator": []} + + for subnode in node: + if subnode.tag == self.qualTagName("group"): + self._handleGroupNode(subnode) + elif subnode.tag == self.qualTagName("context-group"): + self._handleContextGroupNode(subnode) + elif subnode.tag == self.qualTagName("note"): + self._handleNoteNode(subnode, notesDict) + elif subnode.tag == self.qualTagName("trans-unit"): + self._handleTransUnitNode(subnode) + + if pluralGroup: + self.insidePluralGroup = False # for other methods of this class + self._handlePluralGroup(notesDict) + + # Intentionally no @insideScope here! This way, the innermost scope is the + # one created by the parent element of the . + def _handleContextGroupNode(self, node): + for subnode in node: + if subnode.tag == self.qualTagName("context"): + self._handleContextNode(subnode) + else: + raise XliffParseError( + "{file}: illegal element inside a 'context-group' " + "element: {tag!r}".format(file=self.file, tag=subnode.tag)) + + # Intentionally no @insideScope here! + def _handleContextNode(self, node): + # ctxName = node.get("context-name") # optional, unused so far here + ctxType = node.get("context-type") + if ctxType is None: + raise XliffParseError( + "{file}: invalid 'context' element found with no " + "'context-type' attribute".format(file=self.file)) + + # See + # + # for other context types + if ctxType == "linenumber": + self.scopedVars[XliffVariables.lineNumber] = int(node.text) + if ctxType == "sourcefile": + self.scopedVars[XliffVariables.sourceFile] = node.text or "" + elif ctxType == "x-gettext-msgctxt": # Trolltech invention + self.scopedVars[XliffVariables.gettextContext] = node.text or "" + elif ctxType == "x-gettext-previous-msgctxt": # Trolltech invention + self.scopedVars[XliffVariables.gettextPreviousContext] = \ + node.text or "" + # Intentionally no @insideScope here! + def _handleNoteNode(self, node, notesDict): + """Add a translator or developer note to 'noteDict'.""" + origin = node.get("from") + if origin in ("developer", "translator"): + notesDict[origin].append(node.text or "") + elif origin is not None: + # Maybe a bit harsh to raise for this... + raise XliffParseError( + "{file}: unknown 'origin' value for a 'note' element: " + "'{origin}'".format(file=self.file, origin=origin)) + + # There can also be annotates="source" (output by Qt Linguist for + # developer comments in addition to the 'origin' attribute), we don't + # use this attribute. + + @insideScope + def _handleTransUnitNode(self, node): + tuId = node.get("id") + if tuId is None: + raise XliffParseError( + "{file}: the 'id' attribute is required for 'trans-unit' " + "elements".format(file=self.file)) + + approved = self._readXliffBool(node.get("approved", "no")) + + # This one is trickier, because it may be set either in an enclosing + # group or here. + translate = node.get("translate") + if translate is not None: + # This overrides any value from higher levels in the XLIFF input + self.scopedVars[XliffVariables.translate] = \ + self._readXliffBool(translate) + + mo = BasicTranslationUnitId.xliffRegexp.match(tuId) + if mo is None: + raise XliffParseError( + "{file}: this 'id' attribute found on a 'trans-unit' element " + "doesn't have the expected format: '{val}'".format( + file=self.file, val=tuId)) + + tid = BasicTranslationUnitId(mo.group("cat"), mo.group("basicId"), + int(mo.group("index"))) + pluralIndex = mo.group("pluralIndex") + if pluralIndex is not None: + pluralIndex = int(pluralIndex) + sourceText = targetText = None + notesDict = {"developer": [], + "translator": []} + + for subnode in node: + if subnode.tag == self.qualTagName("source"): + if sourceText is not None: + raise XliffParseError( + "{file}: several 'source' elements inside the same " + "'trans-unit' element".format(file=self.file)) + + sourceText = self._handleSourceOrTargetNode(subnode, node.tag) + elif subnode.tag == self.qualTagName("target"): + if targetText is not None: + raise XliffParseError( + "{file}: several 'target' elements inside the same " + "'trans-unit' element".format(file=self.file)) + + targetText = self._handleSourceOrTargetNode(subnode, node.tag) + elif subnode.tag == self.qualTagName("note"): + self._handleNoteNode(subnode, notesDict) + elif subnode.tag == self.qualTagName("context-group"): + # This holds context dev comments, for one, and sets + # XliffVariables.gettextContext in our scope + self._handleContextGroupNode(subnode) + + if sourceText is None: + raise XliffParseError( + "{file}: invalid 'trans-unit' element: doesn't contain any " + "'source' element".format(file=self.file)) + + # The 'else' clause handles two cases: no element, or an empty + # one. + targetTexts = [targetText] if targetText else [""] + translUnit = TranslationUnit( + self.transl.targetLanguage, + sourceText, targetTexts, approved=approved, + translate=self.scopedVars[XliffVariables.translate]) + + if self.insidePluralGroup: + if pluralIndex is None: + raise XliffParseError( + "{file}: invalid plural group: the id attribute value for " + "each form must end with the form's plural index inside " + "brackets (an integer)".format(file=self.file)) + # Related plural forms will be merged into one TranslationUnit when + # the containing ends. + self.pluralGroupContents.append((tid, pluralIndex, translUnit)) + elif tid.cat not in self.transl: + raise XliffParseError( + "{file}: unknown category: '{cat}'" + .format(file=self.file, cat=tid.cat)) + # Source text empty + inside an x-gettext-msgctxt -> context dev comment + # (this is how Qt Linguist works) + elif (not sourceText and + XliffVariables.gettextContext in self.scopedVars): + comment = ContextDevComment( + self.scopedVars[XliffVariables.gettextContext], + translatorComments=notesDict["translator"], + developerComments=notesDict["developer"]) + self.transl.contextDevComments[tid.cat].append(comment) + elif tid in self.transl[tid.cat]: + raise XliffParseError( + "{file}: the same TranslationUnit id (tid) appeared several " + "times, this is fishy: '{tid}'".format(file=self.file, + tid=tid)) + else: + translUnit.translatorComments = notesDict["translator"] + translUnit.developerComments = notesDict["developer"] + # Add a simple TranslationUnit (no plural forms) + self.transl[tid.cat][tid] = translUnit + + def _handleSourceOrTargetNode(self, node, containingTag): + xmlLang = node.get(self.xmlQualName("lang")) + + if node.tag == self.qualTagName("source"): + outerLanguage = langCodeForXliff(self.transl.sourceLanguage) + else: + assert node.tag == self.qualTagName("target"), node.tag + outerLanguage = langCodeForXliff(self.transl.targetLanguage) + + # Error for , but not for + if (containingTag == self.qualTagName("trans-unit") and + xmlLang is not None and xmlLang != outerLanguage): + raise XliffParseError( + "{file}: the 'xml:lang' attribute of a '{thisTag}' element " + "inside a 'trans-unit' element ({xmlLang}) disagrees with the " + "'{thisTag}-language' attribute found on the enclosing 'file' " + "element' ({outerLang})".format( + file=self.file, thisTag=node.tag, xmlLang=xmlLang, + outerLang=outerLanguage)) + + return node.text or "" + +# is Qt Linguist's way of +# storing the _context_ allowing to distinguish between several +# translations that have the same source string. The way described in +# the XLIFF standard, using inside , is only +# usable in Qt Linguist with context-type="x-gettext-msgctxt" for the +# 'context' element. It is also a Trolltech invention, and is stored as +# TranslatorMessage::m_comment instead of TranslatorMessage::m_context. +# The comparison rules in +# bool operator==(TranslatorMessageContentPtr tmp1, +# TranslatorMessageContentPtr tmp2) +# (qt5.git/qttools/src/linguist/shared/translator.cpp) wouldn't suit our +# needs, because two TranslatorMessage instances with the same context() +# and an empty sourceText() (= master) are considered duplicates even if +# they have different values for the comment(). IOW, Qt Linguist's +# notion of TranslatorMessage::comment() can't be used to distinguish +# between two empty master strings that might have different +# translations in different categories. +class XliffFormatWriter: + """Write to XLIFF files.""" + + def _insertComments(self, element, container): + """Insert translator and developer comments into 'element'.""" + for transComment in container.translatorComments: + noteElt = et.SubElement(element, "note", + attrib={"from": "translator"}) + noteElt.text = transComment + + for devComment in container.developerComments: + # Linguist doesn't seem to show developer comments unless + # annotates="source" is given. + noteElt = et.SubElement(element, "note", + attrib={"from": "developer", + "annotates": "source"}) + noteElt.text = devComment + + def _appendSimpleTranslationUnit(self, groupElement, idsUsed, tid, + translUnit): + """Append a TranslationUnit that has no plural forms.""" + # The XLIFF 1.2 standard wouldn't require the leading tid.cat here if + # we were using one per category, because the XLIFF id only has + # to be unique within each element. However: + # + # 1) Qt Linguist doesn't support multiple elements per XLIFF + # file well (they are collapsed upon export). + # + # 2) It would consider for instance elements with the + # same id 'rendering-options:0' from the 'options' and 'menu' + # categories as identical, which is undesirable (e.g., the current + # Spanish translation capitalizes them differently). + # + # Therefore, we prepend the category to make sure all XLIFF + # ids are unique within the whole XLIFF file (this is done + # by AbstractTranslationUnitId.__str__(), called here with str(tid)). + idInXliff = str(tid) + + if idInXliff in idsUsed: + raise XliffLogicalWriteError( + "{file}: id '{id}' would be used for several 'trans-unit' " + "elements. Either the input or the algorithm is buggy." + .format(file=self.file, id=idInXliff)) + + # If you change things here, don't forget + # _appendTranslationUnitWithPlural() + attrs = {"id": idInXliff, + "translate": "yes" if translUnit.translate else "no", + "approved": "yes" if translUnit.approved else "no" + } + transUnitElt = et.SubElement(groupElement, "trans-unit", attrib=attrs) + sourceElt = et.SubElement(transUnitElt, "source") + sourceElt.text = translUnit.sourceText + + # This list should never be empty (i.e., one or more translations) + assert translUnit.targetTexts, translUnit.targetTexts + targetElt = et.SubElement(transUnitElt, "target") + targetElt.text = translUnit.targetTexts[0] + + self._insertComments(transUnitElt, translUnit) + + return idInXliff # value used for the 'id' attr of the elt + + def _appendTranslationUnitWithPlural(self, groupElement, idsUsed, tid, + translUnit): + subgroupElt = et.SubElement(groupElement, "group", id=str(tid), + restype="x-gettext-plurals") + self._insertComments(subgroupElt, translUnit) + idsInXliff = [] + + for i, pluralForm in enumerate(translUnit.targetTexts): + # This is the way Qt Linguist 5.7.1 handles plural forms + idInXliff = "{idStr}[{pluralFormIndex}]".format(idStr=tid, + pluralFormIndex=i) + # If you change things here, don't forget + # _appendSimpleTranslationUnit() + attrs = {"id": idInXliff, + "translate": "yes" if translUnit.translate else "no", + "approved": "yes" if translUnit.approved else "no" + } + transUnitElt = et.SubElement(subgroupElt, "trans-unit", + attrib=attrs) + sourceElt = et.SubElement(transUnitElt, "source") + sourceElt.text = translUnit.sourceText + targetElt = et.SubElement(transUnitElt, "target") + targetElt.text = pluralForm + + idsInXliff.append(idInXliff) + + idsAlreadyUsed = idsUsed.intersection(idsInXliff) + if idsAlreadyUsed: + raise XliffLogicalWriteError( + "{file}: several ids would be reused for different 'trans-unit' " + "elements (problematic ids: {ids}). Either the input or the " + "algorithm is buggy." + .format(file=self.file, ids=idsAlreadyUsed)) + + # Values used for the 'id' attributes of elements + return frozenset(idsInXliff) + + def _appendContextDevCommentsTranslUnits(self, groupElement, idsUsed, cat, + comments): + idsInXliff = [] + + for i, ctxDevComment in enumerate(comments): + idInXliff = "{cat}/_contextDevComment-{num}:0".format(cat=cat, + num=i) + if idInXliff in idsUsed: + raise XliffLogicalWriteError( + "{file}: id '{id}' would be used for several 'trans-unit' " + "elements. This looks like a bug in the algorithm (or an " + "extreme coincidence!)." + .format(file=self.file, id=idInXliff)) + + transUnitElt = et.SubElement(groupElement, "trans-unit", + id=idInXliff) + sourceElt = et.SubElement(transUnitElt, "source") + sourceElt.text = "" + targetElt = et.SubElement(transUnitElt, "target") + targetElt.text = "" + + ctxGroupElt = et.SubElement(transUnitElt, "context-group") + ctxElt = et.SubElement(ctxGroupElt, "context", + attrib={"context-type": "x-gettext-msgctxt"}) + ctxElt.text = ctxDevComment.mainComment + self._insertComments(transUnitElt, ctxDevComment) + idsInXliff.append(idInXliff) + + return idsInXliff # values used for 'id' attrs of elts + + def writeTranslation(self, transl, filePath): + """Write a translation to an XLIFF file or to the standard output. + + transl: a Translation instance + filePath: path to a file, or '-' to designate the standard + output + + """ + xliffAttrs = { + "version": "1.2", + "xmlns": "urn:oasis:names:tc:xliff:document:1.2", + "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "xsi:schemaLocation": + "urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2.xsd" + } + xliffElt = et.Element("xliff", attrib=xliffAttrs) + attrs = { + # Since Qt Linguist (at least 5.7.1) will collapse all + # elements into one upon export, with this attribute empty, + # let's do the same here to minimize the size of diffs. + "original": "", + "source-language": langCodeForXliff(transl.sourceLanguage), + "target-language": langCodeForXliff(transl.targetLanguage), + # If we could use according to the XLIFF 1.2 standard, + # the correct datatype would be 'xml'. Since we can't, let's + # use the same type as Linguist upon export, to minimize diff + # size again. + "datatype": "plaintext", + "xml:space": "preserve" + } + fileElt = et.SubElement(xliffElt, "file", attrib=attrs) + bodyElt = et.SubElement(fileElt, "body") + idsUsed = set() # values used for the 'id' attrs of elements + + for cat, t in transl.translations.items(): # already sorted (OrderedDict) + # See the comment above the class definition + groupElt = et.SubElement(bodyElt, "group", + restype="x-trolltech-linguist-context", + resname=cat) + + contextDevComments = transl.contextDevComments[cat] + if contextDevComments: + idsUsed.update(self._appendContextDevCommentsTranslUnits( + groupElt, idsUsed, cat, contextDevComments)) + + for tid, translUnit in sorted(t.items()): + if translUnit.isPlural: + idsUsed.update( + self._appendTranslationUnitWithPlural( + groupElt, idsUsed, tid, translUnit)) + else: + idsUsed.add(self._appendSimpleTranslationUnit( + groupElt, idsUsed, tid, translUnit)) + + misc.indentXmlTree(xliffElt) + + if filePath == "-": + enc = "unicode" # ElementTree.write() will output str objects + filePathOrObj = sys.stdout + else: + enc = "UTF-8" + filePathOrObj = filePath + + et.ElementTree(xliffElt).write(filePathOrObj, encoding=enc, + xml_declaration=True) + + +class XliffFormatHandler(AbstractFormatHandler): + """Read from, and write to XLIFF files.""" + + standardExtension = "xlf" # used by some base class methods + + def readTranslation(self, filePath): + reader = XliffFormatReader(filePath) + return reader.parse() + + def writeTranslation(self, transl, filePath): + """Write a translation to an XLIFF file or to the standard output. + + transl: a Translation instance + filePath: path to a file, or '-' to designate the standard + output + + """ + writer = XliffFormatWriter() + return writer.writeTranslation(transl, filePath) + + +registerFormatHandler("xliff", XliffFormatHandler) + + +# ***************************************************************************** +# * Classes for reading FlightGear's XML localization files * +# ***************************************************************************** + +# Could also be a dict +def L10nResMgrForCat(category): + """Map from category/resource name to L10NResourceManager class.""" + if category in ("menu", "options", "tips"): + return BasicL10NResourceManager + elif category == "sys": + return SysL10NResourceManager + else: + assert False, "unexpected category: {!r}".format(category) + +# Convenience class for holding the result returned by some high-level +# methods reading FlightGear's XML localization files. +# +# transl: a Translation instance +# nbWhitespacePbs: number of whitespace “problems” encountered in this +# translation (leading or trailing whitespace in +# strings...). Note that for a non-default Translation, +# only the problems in translations (targetTexts) are +# counted: those strings contained in the particular +# non-default FlightGear XML localization file. +TranslationData = collections.namedtuple("TranslationData", + ["transl", "nbWhitespacePbs"]) + +class L10NResourcePoolManager: + + def __init__(self, translationsDir, logger=dummyLogger): + """Initialize a L10NResourcePoolManager instance. + + translationsDir should contain subdirs such as 'en_GB', 'fr_FR', + 'de', 'it'... and the value of DEFAULT_LANG_DIR. + + """ + self.translationsDir = translationsDir + self.logger = logger + self.masterTranslDir = os.path.join(translationsDir, DEFAULT_LANG_DIR) + + def readFgMasterTranslationFile(self, xmlFilePath, targetTransl, cat): + """Read the FlightGear default translation for a given category. + + This is an XML PropertyList file, + $FG_ROOT/Translations/default/.xml at the time of this + writing. + + Return the number of whitespace (potential) problems found. + + """ + resMgr = L10nResMgrForCat(cat) + return resMgr._readFgResourceFile(xmlFilePath, None, targetTransl, cat, + None, logger=self.logger) + + def readFgTranslationFile(self, xmlFilePath, masterTransl, targetTransl, + cat, langCode): + """Read a FlightGear translation file for a given category. + + This is an XML PropertyList file, + $FG_ROOT/Translations//.xml directory at the time + of this writing. + + Return the number of whitespace (potential) problems found. + + """ + resMgr = L10nResMgrForCat(cat) + return resMgr._readFgResourceFile(xmlFilePath, masterTransl, + targetTransl, cat, langCode, + logger=self.logger) + + def readFgMasterTranslation(self): + """Read the FlightGear default translation. + + This is built from XML PropertyList files in directory + 'masterTranslDir' (normally $FG_ROOT/Translations/default, at + the time of this writing). + + """ + transl = Translation("en_US", None) # master translation + nbWhitespaceProblems = 0 + + for cat in CATEGORIES: + xmlFilePath = os.path.join(self.masterTranslDir, cat + ".xml") + resMgr = L10nResMgrForCat(cat) + nbWhitespaceProblems += self.readFgMasterTranslationFile( + xmlFilePath, transl, cat) + + # I don't put the number of whitespace problems in an attribute + # of the Translation, otherwise there could be expectations that + # it is updated when the Translation is modified... + return TranslationData(transl, nbWhitespaceProblems) + + def readFgTranslation(self, masterTransl, langCode): + """Read a FlightGear non-default translation. + + This is built from XML PropertyList files in directory + 'languageDir' (normally $FG_ROOT/Translations/, at the + time of this writing). + + """ + languageDir = os.path.join(self.translationsDir, langCode) + self.logger.info("processing language dir {!r}".format(languageDir)) + + # I assume (and believe) the default translation in FlightGear + # corresponds to U.S. English. + translation = Translation("en_US", langCode) + nbWhitespaceProblems = 0 + + for cat in CATEGORIES: + xmlFilePath = os.path.join(languageDir, cat + ".xml") + + if os.path.isfile(xmlFilePath): + nbWhitespaceProblems += self.readFgTranslationFile( + xmlFilePath, masterTransl, translation, cat, langCode) + + # See comment in readFgMasterTranslation() + return TranslationData(translation, nbWhitespaceProblems) + + def writeTranslation(self, formatHandler, transl, filePath=None): + """Generic writing of a Translation instance. + + formatHandler: instance of a subclass of AbstractFormatHandler + transl: Translation object + + """ + if filePath is None: + filePath = formatHandler.defaultFilePath(self.translationsDir, + transl.targetLanguage) + if filePath != "-": + d = os.path.dirname(filePath) + if not os.path.exists(d): + self.logger.notice("creating directory '{}'".format(d)) + os.makedirs(os.path.dirname(filePath), exist_ok=True) + + return formatHandler.writeTranslation(transl, filePath) + + def genSkeletonTranslation(self, langCode): + """Generate a skeleton Translation instance for a particular language. + + The Translation object will have the 'targetTexts' attribute of + each TranslationUnit set to denote only one empty translation. + This method is useful when adding a translation for a new + language. + + """ + # Create a new master translation + translation = self.readFgMasterTranslation().transl + # This is not a master translation anymore + translation.targetLanguage = langCode + + return translation + + def writeSkeletonTranslation(self, formatHandler, langCode, filePath=None): + transl = self.genSkeletonTranslation(langCode) + return self.writeTranslation(formatHandler, transl, filePath) + + +class L10NResourceManagerBase: + """Base class for *L10NResourceManager classes.""" + + @classmethod + def checkForLeadingOrTrailingWhitespace(cls, langCode, tid, string_, + logger=dummyLogger): + whitespacePb = None + nbWhitespaceProblems = 0 + + if string_.lstrip() != string_: + whitespacePb = "leading" + if string_.rstrip() != string_: + if whitespacePb is not None: + whitespacePb = "leading and trailing" + else: + whitespacePb = "trailing" + + if whitespacePb is not None: + nbWhitespaceProblems += 1 + + if langCode is None: + place = "default translation" + langDir = DEFAULT_LANG_DIR + else: + place = "translation" + langDir = langCode + + logger.warning("{langDir}/{cat}: {kind} whitespace in {place} for " + "string {id!r}: {string!r}" + .format(langDir=langDir, cat=tid.cat, id=tid.id(), + place=place, string=string_, + kind=whitespacePb)) + + return nbWhitespaceProblems + + +class BasicL10NResourceManager(L10NResourceManagerBase): + """Resource manager for FG XML i18n files with the simplest structure. + + This is suitable for resources (menu, options, tips) where + translations are in direct children of the element, + with no more structure. + + """ + @classmethod + def _findMainNode(cls, rootNode): + """ + Return the node directly containing the translations in an FG XML file.""" + assert rootNode.tag == "PropertyList", rootNode.tag + return rootNode + + @classmethod + def _readFgResourceFile(cls, xmlFilePath, masterTransl, targetTransl, cat, + langCode, logger=dummyLogger): + """Read a FlightGear XML localization file. + + If 'masterTransl' and 'langCode' are None, read the default + (i.e., master) translation, normally en_US. The method updates + 'targetTransl', without clearing it first (it should probably be + empty when the method is called). + + This method has to know how data is laid out inside the + FlightGear XML localization file to be read ('xmlFilePath'). For + this reason, it is typically overridden in subclasses of + L10NResourceManagerBase. + + """ + if masterTransl is None: + assert langCode is None, langCode + + nbWhitespaceProblems = 0 + tree = et.parse(xmlFilePath) + rootNode = tree.getroot() + mainNode = cls._findMainNode(rootNode) + + for childNode in mainNode: + n = int(childNode.get("n", default=0)) + tid = BasicTranslationUnitId(cat, childNode.tag, n) + # childNode.text could be None for an empty translation + text = childNode.text or "" + nbWhitespaceProblems += cls.checkForLeadingOrTrailingWhitespace( + langCode, tid, text, logger) + + pluralAttr = childNode.get("with-plural", default="false") + if pluralAttr in ("true", "false"): + isPlural = (pluralAttr == "true") + else: + logger.warning( + "{file}: invalid value for the 'with-plural' attribute of " + "{tid} (expected 'true' or 'false'): {val!r}".format( + file=xmlFilePath, tid=tid, val=pluralAttr)) + continue + + if masterTransl is None: + targetTransl.addMasterString(tid, text, isPlural=isPlural) + elif tid not in masterTransl[cat]: + logger.warning( + "{file}: translated string not in the default " + "translation: {tid}".format(file=xmlFilePath, tid=tid)) + else: + targetTransl.addTranslation( + masterTransl, tid, masterTransl[cat][tid].sourceText, + [text], isPlural=isPlural, logger=logger) + + return nbWhitespaceProblems + + +class SysL10NResourceManager(BasicL10NResourceManager): + + @classmethod + def _findMainNode(cls, rootNode): + """ + Return the node directly containing the translations in sys.xml.""" + assert rootNode.tag == "PropertyList", rootNode.tag + # In sys.xml, all translations are inside a element + mainNode = rootNode.find("splash") + assert mainNode is not None + + return mainNode diff --git a/python3-flightgear/flightgear/meta/logging.py b/python3-flightgear/flightgear/meta/logging.py new file mode 100644 index 0000000..52d45da --- /dev/null +++ b/python3-flightgear/flightgear/meta/logging.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +# logging.py --- Simple logging infrastructure (mostly taken from FFGo) +# Copyright (C) 2015, 2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import sys + +from . import misc + + +class LogLevel(misc.OrderedEnum): + debug, info, notice, warning, error, critical = range(6) + +# List containing the above log levels as strings in increasing priority order +allLogLevels = [member.name for member in LogLevel] +allLogLevels.sort(key=lambda n: LogLevel[n].value) + + +def _logFuncFactory(level): + def logFunc(self, *args, **kwargs): + self.log(LogLevel[level], True, *args, **kwargs) + + def logFunc_noPrefix(self, *args, **kwargs): + self.log(LogLevel[level], False, *args, **kwargs) + + return (logFunc, logFunc_noPrefix) + + +class Logger: + def __init__(self, progname=None, logLevel=LogLevel.notice, + defaultOutputStream=sys.stdout, logFile=None): + self.progname = progname + self.logLevel = logLevel + self.defaultOutputStream = defaultOutputStream + self.logFile = logFile + + def setLogFile(self, *args, **kwargs): + self.logFile = open(*args, **kwargs) + + def log(self, level, printLogLevel, *args, **kwargs): + if printLogLevel and level >= LogLevel.warning and args: + args = [level.name.upper() + ": " + args[0]] + list(args[1:]) + + if level >= self.logLevel: + if (self.progname is not None) and args: + tArgs = [self.progname + ": " + args[0]] + list(args[1:]) + else: + tArgs = args + + kwargs["file"] = self.defaultOutputStream + print(*tArgs, **kwargs) + + if self.logFile is not None: + kwargs["file"] = self.logFile + print(*args, **kwargs) + + # Don't overload log() with too many tests or too much indirection for + # little use + def logToFile(self, *args, **kwargs): + kwargs["file"] = self.logFile + print(*args, **kwargs) + + # NP functions are “no prefix” variants which never prepend the log level + # (otherwise, it is only prepended for warning and higher levels). + debug, debugNP = _logFuncFactory("debug") + info, infoNP = _logFuncFactory("info") + notice, noticeNP = _logFuncFactory("notice") + warning, warningNP = _logFuncFactory("warning") + error, errorNP = _logFuncFactory("error") + critical, criticalNP = _logFuncFactory("critical") + + +class DummyLogger(Logger): + def setLogFile(self, *args, **kwargs): + pass + + def log(self, *args, **kwargs): + pass + + def logToFile(self, *args, **kwargs): + pass diff --git a/python3-flightgear/flightgear/meta/misc.py b/python3-flightgear/flightgear/meta/misc.py new file mode 100644 index 0000000..ccf8ccb --- /dev/null +++ b/python3-flightgear/flightgear/meta/misc.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +# misc.py --- Miscellaneous classes and/or functions +# Copyright (C) 2015-2017 Florent Rougon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import enum + +# Based on an example from the 'enum' documentation +class OrderedEnum(enum.Enum): + """Base class for enumerations whose members can be ordered. + + Contrary to enum.IntEnum, this class maintains normal enum.Enum + invariants, such as members not being comparable to members of other + enumerations (nor of any other class, actually). + + """ + def __ge__(self, other): + if self.__class__ is other.__class__: + return self.value >= other.value + return NotImplemented + + def __gt__(self, other): + if self.__class__ is other.__class__: + return self.value > other.value + return NotImplemented + + def __le__(self, other): + if self.__class__ is other.__class__: + return self.value <= other.value + return NotImplemented + + def __lt__(self, other): + if self.__class__ is other.__class__: + return self.value < other.value + return NotImplemented + + def __eq__(self, other): + if self.__class__ is other.__class__: + return self.value == other.value + return NotImplemented + + def __ne__(self, other): + if self.__class__ is other.__class__: + return self.value != other.value + return NotImplemented + + +# Taken from and modified +# by Florent Rougon +def indentXmlTree(elem, level=0, basicOffset=2, lastChild=False): + def indentation(level): + return "\n" + level*basicOffset*" " + + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = indentation(level+1) + + for e in elem[:-1]: + indentXmlTree(e, level+1, basicOffset, False) + if len(elem): + indentXmlTree(elem[-1], level+1, basicOffset, True) + + if level and (not elem.tail or not elem.tail.strip()): + if lastChild: + elem.tail = indentation(level-1) + else: + elem.tail = indentation(level)