自然言語処理環境を作った メモ

Java のバージョンアップ
$ sudo yum -y install java-1.8.0-openjdk-devel
$ sudo alternatives --config java <- 1.8を選択する

Jython インストール
$ wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.0/jython-installer-2.7.0.jar
$ mv "remotecontent?filepath=org%2Fpython%2Fjython-installer%2F2.7.0%2Fjython-installer-2.7.0.jar" /tmp/jython-installer-2.7.0.jar
$ sudo su -
#  java -jar /tmp/jython-installer-2.7.0.jar -c
Welcome to Jython !
You are about to install Jython version 2.7.0
(at any time, answer c to cancel the installation)
For the installation process, the following languages are available: English, German
Please select your language [E/g] >>>
Do you want to read the license agreement now ? [y/N] >>>
Do you accept the license agreement ? [Y/n] >>>
The following installation types are available:
  1. All (everything, including sources)
  2. Standard (core, library modules, demos and examples, documentation)
  3. Minimum (core)
  9. Standalone (a single, executable .jar)
Please select the installation type [ 1 /2/3/9] >>> 2
Do you want to install additional parts ? [y/N] >>>
Do you want to exclude parts from the installation ? [y/N] >>>
Please enter the target directory >>> /usr/local/jython
Unable to find directory /usr/local/jython, create it ? [Y/n] >>>
Your java version to start Jython is: Oracle Corporation / 1.8.0_101
Your operating system version is: Linux / 4.4.11-23.53.amzn1.x86_64
Summary:
  - mod: true
  - demo: true
  - doc: true
  - src: false
  - ensurepip: true
  - JRE: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.101-3.b13.24.amzn1.x86_64/jre
Please confirm copying of files to directory /usr/local/jython [Y/n] >>>
 10 %
 20 %
 30 %
 40 %
 50 %
 60 %
 70 %
Generating start scripts ...
Installing pip and setuptools
 90 %
Ignoring indexes: https://pypi.python.org/simple/
Downloading/unpacking setuptools
Downloading/unpacking pip
Installing collected packages: setuptools, pip
Successfully installed setuptools pip
Cleaning up...
 100 %
Do you want to show the contents of README ? [y/N] >>>
Congratulations! You successfully installed Jython 2.7.0 to directory /usr/local/jython.

$ sudo chmod -R 777 /usr/local/jython/cachedir

make環境インストール
$ sudo yum -y install gcc-c++ glibc-headers openssl-devel readline libyaml-devel readline-devel zlib zlib-devel libffi-devel libxml2 libxslt libxml2-devel libxslt-devel sqlite-devel

rzsz
$ wget http://ohse.de/uwe/releases/lrzsz-0.12.20.tar.gz
$ tar -xzvf lrzsz-0.12.20.tar.gz
$ cd lrzsz-0.12.20
$ ./configure --prefix=/usr/local
$ make
$ sudo su -
# make install
# cd /usr/local/bin
# ln -s lrz rz
# ln -s lsz sz

elasticsearch-py
$ sudo pip install elasticsearch
$ python
>>> from elasticsearch import Elasticsearch

$ sudo /usr/local/jython/bin/pip install elasticsearch
$ jython
>>> from elasticsearch import Elasticsearch

ICU
$ wget http://download.icu-project.org/files/icu4j/55.1/icu4j-55_1.jar
$ vi ~/.bash_profile
    export CLASSPATH=$CLASSPATH:/hoge/icu4j-55_1.jar


mvn
$ sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo
$ sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
$ sudo yum install -y apache-maven
$ mvn --version

mecab
$ wget http://mecab.googlecode.com/files/mecab-0.996.tar.gz
$ tar zvxf mecab-0.996.tar.gz
$ cd mecab-0.996
$ ./configure
$ make
$ sudo make install

mecab-python
$ sudo su -
# pip install https://mecab.googlecode.com/files/mecab-python-0.996.tar.gz
$ python
    import MeCab

UniDic
$ wget https://osdn.jp/projects/unidic/downloads/58338/unidic-mecab-2.1.2_src.zip
$ unzip unidic-mecab-2.1.2_src.zip
$ cd unidic-mecab-2.1.2_src
$ ./configure
$ make
$ sudo make install
$ sudo ldconfig
$ sudo vi /usr/local/etc/mecabrc 
    ; dicdir =  /usr/local/lib/mecab/dic/ipadic
    dicdir =  /usr/local/lib/mecab/dic/unidic
$ mecab
すもももももももものうち
すもも  スモモ  スモモ  李      名詞-普通名詞-一般
も      モ      モ      も      助詞-係助詞
もも    モモ    モモ    桃      名詞-普通名詞-一般
も      モ      モ      も      助詞-係助詞
もも    モモ    モモ    桃      名詞-普通名詞-一般
の      ノ      ノ      の      助詞-格助詞
うち    ウチ    ウチ    内      名詞-普通名詞-副詞可能
EOS

kuromoji 
$ sudo yum install git
$ cd ~/work
$ git clone https://github.com/atilika/kuromoji.git
$ cd kuromoji
$ mvn -pl kuromoji-unidic -am package <- メモリサイズ小さいと失敗する
$ vi ~/.bash_profile
CLASSPATH=$CLASSPATH:kuromoji-unidic/target/kuromoji-unidic-1.0-SNAPSHOT.jar:kuromoji-core/target/kuromoji-core-1.0-SNAPSHOT.jar