Examples

Basic usage

examples/basic_usage.py:

#!/usr/bin/env python
"""Demonstrate basic case of Cihai's python API with UNIHAN."""

import logging
import typing as t

from cihai.core import Cihai

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")


def run(unihan_options: t.Optional[dict[str, object]] = None) -> None:
    """Initialize Cihai with UNIHAN (automatically initialized implicitly)."""
    if unihan_options is None:
        unihan_options = {}
    c = Cihai()

    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    query = c.unihan.lookup_char("㐭")
    glyph = query.first()

    assert glyph is not None
    log.info(f"lookup for 㐭: {glyph.kDefinition}")

    query = c.unihan.reverse_char("granary")
    log.info(
        'matches for "granary": {} '.format(", ".join([glph.char for glph in query])),
    )


if __name__ == "__main__":
    run()

Character variants

examples/variants.py:

#!/usr/bin/env python
"""CJK Variant lookup example for Cihai."""

import logging
import typing as t

from cihai.core import Cihai
from cihai.data.unihan.dataset import Unihan

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")


def variant_list(unihan: Unihan, field: str) -> None:
    """Return a list of variants for a CJK character."""
    for char in unihan.with_fields([field]):
        log.info(f"Character: {char.char}")
        for var in char.untagged_vars(field):
            log.info(var)


def run(unihan_options: t.Optional[dict[str, object]] = None) -> None:
    """Lookup variants for a CJK character. Accepts UNIHAN options dictionary."""
    if unihan_options is None:
        unihan_options = {}

    """Wrapped so we can test in tests/test_examples.py"""
    log.info("This example log.infos variant character data.")

    c = Cihai(config={"unihan_options": unihan_options})
    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap()

    c.unihan.add_plugin(
        "cihai.data.unihan.dataset.UnihanVariants",
        namespace="variants",
    )

    log.info("## ZVariants")
    variant_list(c.unihan, "kZVariant")

    log.info("## kSemanticVariant")
    variant_list(c.unihan, "kSemanticVariant")

    log.info("## kSpecializedSemanticVariant")
    variant_list(c.unihan, "kSpecializedSemanticVariant")


if __name__ == "__main__":
    run()

examples/variant_ts_difficulties.py:

#!/usr/bin/env python
"""Cihai example for difficult cases of traditional and simplified CJK variants."""

import logging
import typing as t

from cihai.core import Cihai

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")


def run(unihan_options: t.Optional[dict[str, object]] = None) -> None:
    """log.info difficult traditional / simplified CJK variants."""
    if unihan_options is None:
        unihan_options = {}

    c = Cihai()
    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    c.unihan.add_plugin(
        "cihai.data.unihan.dataset.UnihanVariants",
        namespace="variants",
    )

    log.info(
        "This example log.infos some tricky cases of character-by-character "
        "Traditional-Simplified mapping.",
    )
    log.info("https://www.unicode.org/reports/tr38/#N10211")
    log.info("3.7.1 bullet 4")

    for char in c.unihan.with_fields(["kTraditionalVariant", "kSimplifiedVariant"]):
        log.info(f"Character: {char.char}")
        trad = set(char.untagged_vars("kTraditionalVariant"))
        simp = set(char.untagged_vars("kSimplifiedVariant"))
        Unihan = c.sql.base.classes.Unihan
        if Unihan.char in trad and Unihan.char in simp:
            log.info("Case 1")
        else:
            log.info("Case 2 (non-idempotent)")
        for trad_var in trad:
            log.info(f"s2t: {trad_var}")
        for simp_var in simp:
            log.info(f"t2s: {simp_var}")


if __name__ == "__main__":
    run()