Examples#

Basic usage#

examples/basic_usage.py:

#!/usr/bin/env python
"""Demonstrate basic case of Cihai's python API with UNIHAN."""

import typing as t

from cihai.core import Cihai


def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
    """Initialize Cihai with UNIHAN (automatically initialized implicitly)."""
    if unihan_options is None:
        unihan_options = {}
    c = Cihai()

    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    query = c.unihan.lookup_char("㐭")
    glyph = query.first()

    assert glyph is not None
    print("lookup for 㐭: %s" % glyph.kDefinition)

    query = c.unihan.reverse_char("granary")
    print('matches for "granary": %s ' % ", ".join([glph.char for glph in query]))


if __name__ == "__main__":
    run()

Character variants#

examples/variants.py:

#!/usr/bin/env python
"""CJK Variant lookup example for Cihai."""

import typing as t

from cihai.core import Cihai
from cihai.data.unihan.dataset import Unihan


def variant_list(unihan: Unihan, field: str) -> None:
    """Return a list of variants for a CJK character."""
    for char in unihan.with_fields([field]):
        print(f"Character: {char.char}")
        for var in char.untagged_vars(field):
            print(var)


def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
    """Lookup variants for a CJK character. Accepts UNIHAN options dictionary."""
    if unihan_options is None:
        unihan_options = {}

    """Wrapped so we can test in tests/test_examples.py"""
    print("This example prints variant character data.")

    c = Cihai(config={"unihan_options": unihan_options})
    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap()

    c.unihan.add_plugin(
        "cihai.data.unihan.dataset.UnihanVariants",
        namespace="variants",
    )

    print("## ZVariants")
    variant_list(c.unihan, "kZVariant")

    print("## kSemanticVariant")
    variant_list(c.unihan, "kSemanticVariant")

    print("## kSpecializedSemanticVariant")
    variant_list(c.unihan, "kSpecializedSemanticVariant")


if __name__ == "__main__":
    run()

examples/variant_ts_difficulties.py:

#!/usr/bin/env python
"""Cihai example for difficult cases of traditional and simplified CJK variants."""

import typing as t

from cihai.core import Cihai


def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
    """Print difficult traditional / simplified CJK variants."""
    if unihan_options is None:
        unihan_options = {}

    c = Cihai()
    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    c.unihan.add_plugin(
        "cihai.data.unihan.dataset.UnihanVariants",
        namespace="variants",
    )

    print(
        "This example prints some tricky cases of character-by-character "
        "Traditional-Simplified mapping.",
    )
    print("https://www.unicode.org/reports/tr38/#N10211")
    print("3.7.1 bullet 4")

    for char in c.unihan.with_fields(["kTraditionalVariant", "kSimplifiedVariant"]):
        print(f"Character: {char.char}")
        trad = set(char.untagged_vars("kTraditionalVariant"))
        simp = set(char.untagged_vars("kSimplifiedVariant"))
        Unihan = c.sql.base.classes.Unihan
        if Unihan.char in trad and Unihan.char in simp:
            print("Case 1")
        else:
            print("Case 2 (non-idempotent)")
        for trad_var in trad:
            print(f"s2t: {trad_var}")
        for simp_var in simp:
            print(f"t2s: {simp_var}")


if __name__ == "__main__":
    run()