Examples¶
Basic usage¶
examples/basic_usage.py:
#!/usr/bin/env python
"""Demonstrate basic case of Cihai's python API with UNIHAN."""
import logging
import typing as t
from cihai.core import Cihai
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")
def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""Initialize Cihai with UNIHAN (automatically initialized implicitly)."""
if unihan_options is None:
unihan_options = {}
c = Cihai()
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap(unihan_options)
query = c.unihan.lookup_char("㐭")
glyph = query.first()
assert glyph is not None
log.info(f"lookup for 㐭: {glyph.kDefinition}")
query = c.unihan.reverse_char("granary")
log.info(
'matches for "granary": {} '.format(", ".join([glph.char for glph in query])),
)
if __name__ == "__main__":
run()
Character variants¶
examples/variants.py:
#!/usr/bin/env python
"""CJK Variant lookup example for Cihai."""
import logging
import typing as t
from cihai.core import Cihai
from cihai.data.unihan.dataset import Unihan
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")
def variant_list(unihan: Unihan, field: str) -> None:
"""Return a list of variants for a CJK character."""
for char in unihan.with_fields([field]):
log.info(f"Character: {char.char}")
for var in char.untagged_vars(field):
log.info(var)
def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""Lookup variants for a CJK character. Accepts UNIHAN options dictionary."""
if unihan_options is None:
unihan_options = {}
"""Wrapped so we can test in tests/test_examples.py"""
log.info("This example log.infos variant character data.")
c = Cihai(config={"unihan_options": unihan_options})
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap()
c.unihan.add_plugin(
"cihai.data.unihan.dataset.UnihanVariants",
namespace="variants",
)
log.info("## ZVariants")
variant_list(c.unihan, "kZVariant")
log.info("## kSemanticVariant")
variant_list(c.unihan, "kSemanticVariant")
log.info("## kSpecializedSemanticVariant")
variant_list(c.unihan, "kSpecializedSemanticVariant")
if __name__ == "__main__":
run()
examples/variant_ts_difficulties.py:
#!/usr/bin/env python
"""Cihai example for difficult cases of traditional and simplified CJK variants."""
import logging
import typing as t
from cihai.core import Cihai
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")
def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""log.info difficult traditional / simplified CJK variants."""
if unihan_options is None:
unihan_options = {}
c = Cihai()
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap(unihan_options)
c.unihan.add_plugin(
"cihai.data.unihan.dataset.UnihanVariants",
namespace="variants",
)
log.info(
"This example log.infos some tricky cases of character-by-character "
"Traditional-Simplified mapping.",
)
log.info("https://www.unicode.org/reports/tr38/#N10211")
log.info("3.7.1 bullet 4")
for char in c.unihan.with_fields(["kTraditionalVariant", "kSimplifiedVariant"]):
log.info(f"Character: {char.char}")
trad = set(char.untagged_vars("kTraditionalVariant"))
simp = set(char.untagged_vars("kSimplifiedVariant"))
Unihan = c.sql.base.classes.Unihan
if Unihan.char in trad and Unihan.char in simp:
log.info("Case 1")
else:
log.info("Case 2 (non-idempotent)")
for trad_var in trad:
log.info(f"s2t: {trad_var}")
for simp_var in simp:
log.info(f"t2s: {simp_var}")
if __name__ == "__main__":
run()