Examples#
Basic usage#
examples/basic_usage.py:
#!/usr/bin/env python
"""Demonstrate basic case of Cihai's python API with UNIHAN."""
import typing as t
from cihai.core import Cihai
def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""Initialize Cihai with UNIHAN (automatically initialized implicitly)."""
if unihan_options is None:
unihan_options = {}
c = Cihai()
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap(unihan_options)
query = c.unihan.lookup_char("㐭")
glyph = query.first()
assert glyph is not None
print("lookup for 㐭: %s" % glyph.kDefinition)
query = c.unihan.reverse_char("granary")
print('matches for "granary": %s ' % ", ".join([glph.char for glph in query]))
if __name__ == "__main__":
run()
Character variants#
examples/variants.py:
#!/usr/bin/env python
"""CJK Variant lookup example for Cihai."""
import typing as t
from cihai.core import Cihai
from cihai.data.unihan.dataset import Unihan
def variant_list(unihan: Unihan, field: str) -> None:
"""Return a list of variants for a CJK character."""
for char in unihan.with_fields([field]):
print(f"Character: {char.char}")
for var in char.untagged_vars(field):
print(var)
def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""Lookup variants for a CJK character. Accepts UNIHAN options dictionary."""
if unihan_options is None:
unihan_options = {}
"""Wrapped so we can test in tests/test_examples.py"""
print("This example prints variant character data.")
c = Cihai(config={"unihan_options": unihan_options})
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap()
c.unihan.add_plugin(
"cihai.data.unihan.dataset.UnihanVariants",
namespace="variants",
)
print("## ZVariants")
variant_list(c.unihan, "kZVariant")
print("## kSemanticVariant")
variant_list(c.unihan, "kSemanticVariant")
print("## kSpecializedSemanticVariant")
variant_list(c.unihan, "kSpecializedSemanticVariant")
if __name__ == "__main__":
run()
examples/variant_ts_difficulties.py:
#!/usr/bin/env python
"""Cihai example for difficult cases of traditional and simplified CJK variants."""
import typing as t
from cihai.core import Cihai
def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""Print difficult traditional / simplified CJK variants."""
if unihan_options is None:
unihan_options = {}
c = Cihai()
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap(unihan_options)
c.unihan.add_plugin(
"cihai.data.unihan.dataset.UnihanVariants",
namespace="variants",
)
print(
"This example prints some tricky cases of character-by-character "
"Traditional-Simplified mapping.",
)
print("https://www.unicode.org/reports/tr38/#N10211")
print("3.7.1 bullet 4")
for char in c.unihan.with_fields(["kTraditionalVariant", "kSimplifiedVariant"]):
print(f"Character: {char.char}")
trad = set(char.untagged_vars("kTraditionalVariant"))
simp = set(char.untagged_vars("kSimplifiedVariant"))
Unihan = c.sql.base.classes.Unihan
if Unihan.char in trad and Unihan.char in simp:
print("Case 1")
else:
print("Case 2 (non-idempotent)")
for trad_var in trad:
print(f"s2t: {trad_var}")
for simp_var in simp:
print(f"t2s: {simp_var}")
if __name__ == "__main__":
run()