参考:ollama兼容OpenAIEmbeddings的解决思路
解决代码:
访问embedding模型代码
# 测试以下两个引用都可以
from langchain_openai import OpenAIEmbeddings
#from langchain_community.embeddings import OpenAIEmbeddings
from typing import List, Optional
class OllamaCompatibleEmbeddings(OpenAIEmbeddings):
def _tokenize(self, texts: List[str], chunk_size: int) -> tuple:
"""
禁用 Tokenization,直接返回原始文本和索引
"""
indices = list(range(len(texts)))
return (range(0, len(texts), chunk_size), texts, indices)
def _get_len_safe_embeddings(
self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
) -> List[List[float]]:
"""
直接传递原始文本,跳过 Token 化步骤
"""
_chunk_size = chunk_size or self.chunk_size
batched_embeddings: List[List[float]] = []
# 直接遍历原始文本分块
for i in range(0, len(texts), _chunk_size):
chunk = texts[i: i + _chunk_size]
# 关键修改:input 直接使用文本列表
response = self.client.create(
input=chunk, # 直接使用原始文本列表
model=self.model, # 显式传递模型参数
**{k: v for k, v in self._invocation_params.items() if k != "model"}
)
if not isinstance(response, dict):
response = response.model_dump()
batched_embeddings.extend(r["embedding"] for r in response["data"])
# 跳过空文本处理(Ollama 不需要)
return batched_embeddings
async def _aget_len_safe_embeddings(
self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
) -> List[List[float]]:
"""
异步版本处理逻辑
"""
_chunk_size = chunk_size or self.chunk_size
batched_embeddings: List[List[float]] = []
for i in range(0, len(texts), _chunk_size):
chunk = texts[i: i + _chunk_size]
response = await self.async_client.create(
input=chunk,
model=self.model,
**{k: v for k, v in self._invocation_params.items() if k != "model"}
)
if not isinstance(response, dict):
response = response.model_dump()
batched_embeddings.extend(r["embedding"] for r in response["data"]) # 注意: 实际应为 "embedding"
return batched_embeddings
embeddings = OllamaCompatibleEmbeddings(
model="bge-m3:latest",
openai_api_base='http://localhost:11434/v1',#说明一下,openai会自己拼接成http://127.0.0.1:11434/v1/embeddings
api_key="ollama",# 任意字符串即可
chunk_size=512
)
output = embeddings.embed_query("你好")
print(output)
访问LLM模型代码
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
# 配置ChatOpenAI类指向本地Ollama API
llm = ChatOpenAI(
model_name="deepseek-r1:1.5b", # 指定Ollama部署的模型名称
openai_api_base="http://localhost:11434/v1", # Ollama API地址
api_key="ollama", # 任意字符串,仅用于兼容格式
temperature=0.7, # 控制输出随机性,0为确定性,1为最大随机性
max_tokens=2000, # 最大生成长度
)
# 定义提问函数
def ask_llm(question: str) -> str:
"""向本地DeepSeek-R1模型提问并获取回答"""
# 创建HumanMessage对象
messages = [HumanMessage(content=question)]
# 调用模型生成回答
response = llm.invoke(messages)
# 提取回答内容
return response.content
# 测试示例
if __name__ == "__main__":
question = "请简要介绍量子计算的基本原理"
answer = ask_llm(question)
print(f"问题: {question}")
print("\n回答:")
print(answer)

923

被折叠的 条评论
为什么被折叠?



