ICU で 文字列の正規化を行う

インストール

$ wget http://download.icu-project.org/files/icu4j/55.1/icu4j-55_1-docs.jar
$ vi ~/.bash_profile
    export CLASSPATH=$CLASSPATH:/hoge/icu4j-55_1.jar
#!/usr/local/jython/bin/jython
# -*- coding: utf-8 -*-

from com.ibm.icu.text import Transliterator

transliterator = Transliterator.getInstance('Fullwidth-Halfwidth')

def icu_normalize(text):
    normalized_text = transliterator.transliterate(text).lower()
    return normalized_text

if __name__ == "__main__":
 parseWord = u"BMW 320"

 print icu_normalize( parseWord )
]$ jython icu0.jy
bmw 320