Conversation
changes from original Babel PR python-babel#660 python-babel#660
Recursion bugs removed by improved rule parsing and routing, recursion exception now handled Context improved by moving to dataclass Compute divisor fixed by adding precision context Plural tokens now parsed Typos and improved comments
…cales, fix linting errors
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #1232 +/- ##
==========================================
+ Coverage 91.93% 92.27% +0.33%
==========================================
Files 27 28 +1
Lines 4688 4972 +284
==========================================
+ Hits 4310 4588 +278
- Misses 378 384 +6
Flags with carried forward coverage won't be shown. Click here to find out more. ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
akx
left a comment
There was a problem hiding this comment.
Thanks for reviving the number spelling challenge, and sorry it took me so long to get to reviewing this.
Beyond the comments within (mostly lack of type hints etc, but some other things too), please remove the non-human-audited (were there audited ones?) rbnf_test_cases? We don't want to add all of that to the repo here (especially if/when we don't know if they're correct). The script to generate them should naturally stay there (some comments for the script too).
I did take a quick look at the test cases for my native Finnish, and they look correct :)
| SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS)) | ||
|
|
||
|
|
||
| def spell_number(number, locale=LC_NUMERIC, ruleset=None): |
There was a problem hiding this comment.
Could you add type annotations here, please?
| return speller.format(number, ruleset=ruleset) | ||
|
|
||
|
|
||
| def get_rbnf_rules(locale=LC_NUMERIC): |
There was a problem hiding this comment.
I'm not sure this needs to be a public API? (If it is, it also locks down the public API for RuleBasedNumberFormat.) Or... WDYT, what sort of app would consume the raw rules?
| territory_languages[territory.attrib['type']] = languages | ||
|
|
||
|
|
||
| # To help the negotiation in `babel.numbers.spell_number` |
There was a problem hiding this comment.
spell_number doesn't seem to directly read this; can you fix up the comment?
| # there will be no rbnf rules for all locales | ||
| # there could be a separate iteration for rbnf rule files |
There was a problem hiding this comment.
I'm not sure I'm totally following this comment?
|
|
||
| # Undocumented syntax (←%rule-name←←) | ||
| # Trac ticket filed for CLDR update PL rbnf | ||
| # http://unicode.org/cldr/trac/ticket/10544 |
There was a problem hiding this comment.
https://unicode-org.atlassian.net/browse/CLDR-10544 is saying this was resolved in https://unicode-org.atlassian.net/browse/CLDR-8909 and rules are flat now - can you check if these comments are still relevant?
| test_cases = None | ||
|
|
||
| def get_test_cases( | ||
| template_toml_path: str = None, | ||
| ruleset_name: str = None, | ||
| ) -> dict: | ||
|
|
||
| global test_cases | ||
|
|
||
| if test_cases is None: | ||
| if template_toml_path is None: | ||
| with open("tests/rbnf_test_cases/_template.toml", "rb") as f: | ||
| test_cases_temp = tomllib.load(f) | ||
| test_cases = test_cases_temp | ||
| else: | ||
| with open(template_toml_path, "rb") as f: | ||
| test_cases_temp = tomllib.load(f) | ||
| else: | ||
| test_cases_temp = test_cases | ||
|
|
||
| return test_cases_temp[get_mapped_ruleset_name(ruleset_name)].items() |
There was a problem hiding this comment.
Looks like this simplifies to something like
(import functools.cache and pathlib.Path)
| test_cases = None | |
| def get_test_cases( | |
| template_toml_path: str = None, | |
| ruleset_name: str = None, | |
| ) -> dict: | |
| global test_cases | |
| if test_cases is None: | |
| if template_toml_path is None: | |
| with open("tests/rbnf_test_cases/_template.toml", "rb") as f: | |
| test_cases_temp = tomllib.load(f) | |
| test_cases = test_cases_temp | |
| else: | |
| with open(template_toml_path, "rb") as f: | |
| test_cases_temp = tomllib.load(f) | |
| else: | |
| test_cases_temp = test_cases | |
| return test_cases_temp[get_mapped_ruleset_name(ruleset_name)].items() | |
| @cache | |
| def _get_test_cases_template(path: str = "tests/rbnf_test_cases/_template.toml"): | |
| return tomllib.load(Path(path).read_text()) | |
| def get_test_cases(ruleset_name: str): | |
| return _get_test_cases_template()[get_mapped_ruleset_name(ruleset_name)] |
| def get_mapped_ruleset_name(ruleset: str) -> str: | ||
| print(ruleset) | ||
| mapping = { | ||
| "spellout-numbering-year": "year", | ||
| "spellout-numbering": "numbering", | ||
| "spellout-ordinal": "ordinal", | ||
| "spellout-cardinal": "cardinal", | ||
| } | ||
| for k, v in mapping.items(): | ||
| if ruleset.startswith(k): | ||
| return v | ||
| return 'numbering' # default fallback |
There was a problem hiding this comment.
| def get_mapped_ruleset_name(ruleset: str) -> str: | |
| print(ruleset) | |
| mapping = { | |
| "spellout-numbering-year": "year", | |
| "spellout-numbering": "numbering", | |
| "spellout-ordinal": "ordinal", | |
| "spellout-cardinal": "cardinal", | |
| } | |
| for k, v in mapping.items(): | |
| if ruleset.startswith(k): | |
| return v | |
| return 'numbering' # default fallback | |
| mapping = { | |
| "spellout-numbering-year": "year", | |
| "spellout-numbering": "numbering", | |
| "spellout-ordinal": "ordinal", | |
| "spellout-cardinal": "cardinal", | |
| } | |
| def get_mapped_ruleset_name(ruleset: str) -> str: | |
| for k, v in mapping.items(): | |
| if ruleset.startswith(k): | |
| return v | |
| return 'numbering' # default fallback |
| def generate_test_for_locale( | ||
| locale: Locale, | ||
| output_toml_path: str, | ||
| test_cases: dict = None, | ||
|
|
||
|
|
||
| ) -> None: |
There was a problem hiding this comment.
| def generate_test_for_locale( | |
| locale: Locale, | |
| output_toml_path: str, | |
| test_cases: dict = None, | |
| ) -> None: | |
| def generate_test_for_locale( | |
| locale: Locale, | |
| output_toml_path: str, | |
| ) -> None: |
| try: | ||
| v2 = speller.format(k, ruleset=ruleset) | ||
| print(f" {k} : '{v2}'") | ||
| lines.append(f'{k} = "{v2}"') | ||
| except RBNFError as e: | ||
| print(k, locale, ruleset, e) | ||
| input() |
There was a problem hiding this comment.
| try: | |
| v2 = speller.format(k, ruleset=ruleset) | |
| print(f" {k} : '{v2}'") | |
| lines.append(f'{k} = "{v2}"') | |
| except RBNFError as e: | |
| print(k, locale, ruleset, e) | |
| input() | |
| v2 = speller.format(k, ruleset=ruleset) | |
| lines.append(f'{k} = "{v2}"') |
| def generate_all_tests( | ||
| test_cases_toml_path: str = "tests/rbnf_test_cases/_template.toml", | ||
| output_dir: str = "tests/rbnf_test_cases/", | ||
| ) -> None: | ||
|
|
||
| for locale in list(get_global('rbnf_locales')): | ||
| output_toml_path = os.path.join(output_dir, f"{locale}.toml") | ||
| generate_test_for_locale(locale, output_toml_path) |
There was a problem hiding this comment.
| def generate_all_tests( | |
| test_cases_toml_path: str = "tests/rbnf_test_cases/_template.toml", | |
| output_dir: str = "tests/rbnf_test_cases/", | |
| ) -> None: | |
| for locale in list(get_global('rbnf_locales')): | |
| output_toml_path = os.path.join(output_dir, f"{locale}.toml") | |
| generate_test_for_locale(locale, output_toml_path) | |
| def generate_all_tests() -> None: | |
| for locale in get_global('rbnf_locales'): | |
| generate_test_for_locale(locale, "tests/rbnf_test_cases/{locale}.toml") |
Number spelling based on the pure Python implementation of the CLDR RBNF engine.
Currently only supports whole numbers (cardinal, ordinal, year) as fractional numbers are not well-defined and not universally implemented in CLDR.
Based on the original PR #660 with useful enhancements from @akx at #682 and additional work from the original author:
Extensive test cases are added, but should be considered smoke tests until a native speaker reviews them for each language.
Based on an earlier discussion: #114 and referenced in #179
Supersedes #682