Examples

Basic usage

examples/basic_usage.py:

#!/usr/bin/env python
# -*- coding: utf8 - *-
from __future__ import print_function, unicode_literals

from cihai.core import Cihai


def run(unihan_options={}):
    c = Cihai()

    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    query = c.unihan.lookup_char('好')
    glyph = query.first()
    print("lookup for 好: %s" % glyph.kDefinition)

    query = c.unihan.reverse_char('good')
    print('matches for "good": %s ' % ', '.join([glph.char for glph in query]))


if __name__ == '__main__':
    run()

Character variants

examples/variants.py:

#!/usr/bin/env python
# -*- coding: utf8 - *-
from __future__ import print_function, unicode_literals

from cihai.core import Cihai


def variant_list(unihan, field):
    for char in unihan.with_fields(field):
        print("Character: {}".format(char.char))
        for var in char.untagged_vars(field):
            print(var)


def run(unihan_options={}):
    """Wrapped so we can test in tests/test_examples.py"""
    print("This example prints variant character data.")

    c = Cihai()
    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    c.unihan.add_plugin(
        'cihai.data.unihan.dataset.UnihanVariants', namespace='variants'
    )

    print("## ZVariants")
    variant_list(c.unihan, "kZVariant")

    print("## kSemanticVariant")
    variant_list(c.unihan, "kSemanticVariant")

    print("## kSpecializedSemanticVariant")
    variant_list(c.unihan, "kSpecializedSemanticVariant")


if __name__ == '__main__':
    run()

examples/variant_ts_difficulties.py:

#!/usr/bin/env python
# -*- coding: utf8 - *-
from __future__ import print_function, unicode_literals

from cihai.core import Cihai


def run(unihan_options={}):
    c = Cihai()
    if not c.unihan.is_bootstrapped:  # download and install Unihan to db
        c.unihan.bootstrap(unihan_options)

    c.unihan.add_plugin(
        'cihai.data.unihan.dataset.UnihanVariants', namespace='variants'
    )

    print(
        "This example prints some tricky cases of character-by-character "
        "Traditional-Simplified mapping."
    )
    print("https://www.unicode.org/reports/tr38/#N10211")
    print("3.7.1 bullet 4")

    for char in c.unihan.with_fields("kTraditionalVariant", "kSimplifiedVariant"):
        print("Character: {}".format(char.char))
        trad = set(char.untagged_vars("kTraditionalVariant"))
        simp = set(char.untagged_vars("kSimplifiedVariant"))
        Unihan = c.sql.base.classes.Unihan
        if Unihan.char in trad and Unihan.char in simp:
            print("Case 1")
        else:
            print("Case 2 (non-idempotent)")
        for trad_var in trad:
            print("s2t: {}".format(trad_var))
        for simp_var in simp:
            print("t2s: {}".format(simp_var))


if __name__ == '__main__':
    run()