Ticket #4449: langs_table.py

File langs_table.py, 5.4 KB (added by manuq, 11 years ago)

Script that outputs a table for comparison

Line 
1import sys
2import gettext
3import subprocess
4
5from babel import Locale as BabelLocale
6from babel.core import UnknownLocaleError as BabelUnknownLocaleError
7from PyICU import Locale as PyICULocale
8
9
10def get_locale_name_gettext(code, language_name, country_name,
11                            lang_iso='iso_639'):
12    language, country = code.split('_')
13
14    language_translation = None
15    try:
16        translation = gettext.translation(lang_iso, languages=[language])
17        language_translation = translation.ugettext(language_name)
18    except IOError:
19        sys.stderr.write("ugettext raised IOError for %s using %s\n" % (code, lang_iso))
20        language_translation = "Unknown"
21    except UnicodeError:
22        sys.stderr.write("ugettext raised UnicodeError for %s\n" % code)
23        language_translation = "Unknown"
24
25    country_translation = None
26    try:
27        translation = gettext.translation('iso_3166', languages=[language])
28        country_translation = translation.ugettext(country_name)
29    except IOError:
30        sys.stderr.write("ugettext raised IOError for %s using %s\n" % (code, lang_iso))
31        country_translation = "Unknown"
32    except UnicodeError:
33        sys.stderr.write("ugettext raised UnicodeError for %s\n" % code)
34        country_translation = "Unknown"
35
36    return "%s (%s)" % (language_translation, country_translation)
37
38def get_locale_name_babel(code):
39    language, country = code[:-(len('.utf8'))].split('_')
40    try:
41        locale = BabelLocale(language, country)
42    except BabelUnknownLocaleError:
43        sys.stderr.write("babel raised BabelUnknownLocaleError for %s\n" % code)
44        return u"Unknown (Unknown)"
45    if locale.display_name is None:
46        sys.stderr.write("babel got display_name = None for %s\n" % code)
47        return u"Unknown (Unknown)"
48    return locale.display_name
49
50def get_locale_name_pyicu(code):
51    locale = PyICULocale(code)
52    return locale.getDisplayName(locale)
53
54def read_all_languages():
55    fdp = subprocess.Popen(['locale', '-av'], stdout=subprocess.PIPE)
56    lines = fdp.stdout.read().split('\n')
57    locales = []
58
59    for line in lines:
60        if line.find('locale:') != -1:
61            locale = line.split()[1]
62        elif line.find('language |') != -1:
63            lang = line.lstrip('language |')
64        elif line.find('territory |') != -1:
65            territory = line.lstrip('territory |')
66            if locale.endswith('utf8') and len(lang):
67                locales.append((lang, territory, locale))
68
69    #FIXME: This is a temporary workaround for locales that are essential to
70    # OLPC, but are not in Glibc yet.
71    locales.append(('Kreyol', 'Haiti', 'ht_HT.utf8'))
72    locales.append(('Dari', 'Afghanistan', 'fa_AF.utf8'))
73    locales.append(('Pashto', 'Afghanistan', 'ps_AF.utf8'))
74
75    locales.sort()
76    return locales
77
78if __name__ == '__main__':
79    available_locales = read_all_languages()#[:1]
80
81    names_original = {}
82    for language, country, code in available_locales:
83        names_original[code] = "%s (%s)" % (language, country)
84
85    names_gettext_iso_639 = {}
86    for language, country, code in available_locales:
87        locale_name = get_locale_name_gettext(code, language, country,
88                                              lang_iso='iso_639')
89        names_gettext_iso_639[code] = locale_name
90
91    names_gettext_iso_639_3 = {}
92    for language, country, code in available_locales:
93        locale_name = get_locale_name_gettext(code, language, country,
94                                              lang_iso='iso_639_3')
95        names_gettext_iso_639_3[code] = locale_name
96
97    names_babel = {}
98    for language, country, code in available_locales:
99        locale_name = get_locale_name_babel(code)
100        names_babel[code] = locale_name
101
102    names_pyicu = {}
103    for language, country, code in available_locales:
104        locale_name = get_locale_name_pyicu(code)
105        names_pyicu[code] = locale_name
106
107    def get_cols(_dict):
108        return max(len(name) for name in _dict.values())
109
110    cols_code = max(len(code) for language, country, code in available_locales)
111    cols_original = get_cols(names_original)
112    cols_gettext_iso_639 = get_cols(names_gettext_iso_639)
113    cols_gettext_iso_639_3 = get_cols(names_gettext_iso_639_3)
114    cols_babel = get_cols(names_babel)
115    cols_pyicu = get_cols(names_pyicu)
116
117    title = "%s | %s | %s | %s | %s | %s |\n" % (
118        u"Code".ljust(cols_code),
119        u"Original".ljust(cols_original),
120        u"ISO 639".ljust(cols_gettext_iso_639),
121        u"ISO 639-3".ljust(cols_gettext_iso_639_3),
122        u"Babel".ljust(cols_babel),
123        u"ICU".ljust(cols_pyicu),
124        )
125    title = title.encode('utf-8')
126    sys.stdout.write(title)
127
128    # FIXME nb_NO.utf8 and no_NO.utf8 are the only ones that give
129    # UnicodeDecodeError
130    for language, country, code in available_locales:
131        try:
132            line = "%s | %s | %s | %s | %s | %s |\n" % (
133                code.ljust(cols_code),
134                names_original[code].ljust(cols_original),
135                names_gettext_iso_639[code].ljust(cols_gettext_iso_639),
136                names_gettext_iso_639_3[code].ljust(cols_gettext_iso_639_3),
137                names_babel[code].ljust(cols_babel),
138                names_pyicu[code].ljust(cols_pyicu),
139                )
140            line = line.encode('utf-8')
141            sys.stdout.write(line)
142        except UnicodeDecodeError:
143            sys.stderr.write("UnicodeDecodeError: %s\n" % code)
144            continue