WSL + Docker
docker build -t python312 . docker images docker run -it --name python312 --gpus all -h python312 -v /home/hoge:/home/hoge -v /mnt:/mnt -p 8888:8888 -p 11434:11434 python312 ssh 172.17.0.2
# Ollama インストール
ollama.com
curl -fsSL https://ollama.com/install.sh | sh ollama serve ollama run gemma2
# llama インストール
$ cat requirements.txt llama_index llama-index-llms-ollama llama-index-embeddings-huggingface llama-index-llms-llama-cpp docx2txt pip install -r requirements.txt
# プログラム実行
from llama_index.llms.ollama import Ollama
#llm = Ollama(model="phi3", request_timeout=60.0)
#llm = Ollama(model="lucas2024/llama-3-elyza-jp-8b:q5_k_m", request_timeout=60.0)
#llm = Ollama(model="llama3.1", request_timeout=60.0)
llm = Ollama(model="gemma2", request_timeout=60.0)
#response = llm.complete("What is the capital of France?")
response = llm.complete("日本で一番高い山はどこですか?")
print(response)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
#embed_model = HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2")
#embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-large")
embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-small")
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
reader = SimpleDirectoryReader("data")
data = reader.load_data()
index = VectorStoreIndex.from_documents(data, embed_model=embed_model)
# クエリ
query_engine = index.as_query_engine(llm=llm, streaming=True, similarity_top_k=3)
while True:
req_msg = input("\n\n >")
if req_msg == "":
continue
res_msg = query_engine.query(req_msg)
res_msg.print_response_stream()