12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # from langchain.embeddings.openai import OpenAIEmbeddings
- from langchain.vectorstores.faiss import FAISS
- from langchain.schema import Document
- import jsonlines
- import json
- import os
- from qwen_agent.memory.SqlMemory import embeddings
- from qwen_agent.utils.util import get_data_from_jsons
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
- class PlanExampleRetrieval():
- def __init__(self, query_type='bidding') -> None:
- # self.EMBEDDING_MODEL = "text2vec" # embedding 模型,对应 embedding_model_dict
- # self.DEVICE = "cuda:2"
- # self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[self.EMBEDDING_MODEL],)
- # self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
- # device=self.DEVICE)
- few_shot_docs = []
- # embeddings = OpenAIEmbeddings() LianqiaiAgent/qwen_agent/memory/data/ifbunitplan_examples.jsonl
- # 修改成自动读取下面的多个json,方便扩展
- data = get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plan_examples')
- for line in data:
- if line['query_type'] == query_type:
- few_shot_docs.append(Document(page_content=line['query'], metadata={'plan':line['plan']}))
- self.vector_db = FAISS.from_documents(few_shot_docs, embeddings)
- def get_relevant_documents(self,query,top_k=4):
- results=[]
- for r in self.vector_db.similarity_search(query, k=top_k):
- results.append((r.page_content, r.metadata['plan']))
- return results
- if __name__=="__main__":
- print(os.path.abspath(os.path.dirname(__file__)))
- print(get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plans'))
- # print(os.path.join(os.path.abspath(os.path.dirname(__file__)),'data/sqls.jsonl'))
|