# from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores.faiss import FAISS from langchain.schema import Document import jsonlines import json import os from qwen_agent.memory.SqlMemory import embeddings from qwen_agent.utils.util import get_data_from_jsons os.environ["TOKENIZERS_PARALLELISM"] = "false" class PlanExampleRetrieval(): def __init__(self, query_type='bidding') -> None: # self.EMBEDDING_MODEL = "text2vec" # embedding 模型,对应 embedding_model_dict # self.DEVICE = "cuda:2" # self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[self.EMBEDDING_MODEL],) # self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name, # device=self.DEVICE) few_shot_docs = [] # embeddings = OpenAIEmbeddings() LianqiaiAgent/qwen_agent/memory/data/ifbunitplan_examples.jsonl # 修改成自动读取下面的多个json,方便扩展 data = get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plan_examples') for line in data: if line['query_type'] == query_type: few_shot_docs.append(Document(page_content=line['query'], metadata={'plan':line['plan']})) self.vector_db = FAISS.from_documents(few_shot_docs, embeddings) def get_relevant_documents(self,query,top_k=4): results=[] for r in self.vector_db.similarity_search(query, k=top_k): results.append((r.page_content, r.metadata['plan'])) return results if __name__=="__main__": print(os.path.abspath(os.path.dirname(__file__))) print(get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plans')) # print(os.path.join(os.path.abspath(os.path.dirname(__file__)),'data/sqls.jsonl'))