Bläddra i källkod

智能选址代码添加注释

liutao 1 månad sedan
förälder
incheckning
ada4a00b94

+ 4 - 11
aiAgent_gd/qwen_agent/config/db_config.py

@@ -6,17 +6,10 @@ db_list: dict[str, dict[Any, Any] | dict[str, str]] = {
 
     },
     "pg": {
-        "host": "10.249.168.231",
-        "port": "54321",
+        "host": "10.10.9.243",
+        "port": "5432",
         "database": "sde",
-        "user": "zjugis",
-        "password": "zjugis1402!",
-    },
-    "xzpg": {
-        "host": "172.27.27.16",
-        "port": "3433",
-        "database": "yzt",
-        "user": "zjgt_ww_readonly",
-        "password": "Zjgt_ww_16",
+        "user": "sde",
+        "password": "sde",
     }
 }

+ 49 - 38
aiAgent_gd/qwen_agent/memory/SqlMemory.py

@@ -1,9 +1,7 @@
-# from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.vectorstores.faiss import FAISS
 from langchain.schema import Document
 import sentence_transformers
-import jsonlines
 import json
 import os
 
@@ -13,14 +11,16 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 embedding_model_dict = {
     # "text2vec": "/data/m3e-base",
-    "text2vec": "E:\AI_temp\m3e-base",
-
+    "text2vec": "E:\项目临时\AI大模型\m3e-base",
+    # "text2vec": r"E:\项目临时\AI大模型\bge_large_zh_v1.5",#使用bge-large-zh-v3模型也可以进行相似度搜索
 }
 EMBEDDING_MODEL = "text2vec"  # embedding 模型,对应 embedding_model_dict
 DEVICE = "cpu"
 
-embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[EMBEDDING_MODEL],)
-embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name,device=DEVICE)
+embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[EMBEDDING_MODEL])
+embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name, device=DEVICE)
+
+
 # embeddings = DashScopeEmbeddings(model="text-embedding-v1",
 #                                  dashscope_api_key="sk-cb5c097eb78f4dae8daa6a833590d757")
 
@@ -29,13 +29,17 @@ class SqlRetriever():
     def __init__(self, query_type='bidding') -> None:
 
         few_shot_docs = []
-        self.data = get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'sql_examples')
+        self.data = get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'),
+                                        'sql_examples')
         for line in self.data:
             if line['query_type'] == query_type:
                 few_shot_docs.append(Document(page_content=line['query'], metadata={'sql_code': line['sql_code']}))
-
+        # page_content='帮我在萧山区推荐几块50亩左右的工业用地,数据表是控制性详细规划' metadata={'sql_code': "select id from sde.kzxxxgh where xzqmc = '萧山区' and ydxz like '%工业%' and abs(ydmj - 50*0.0667) <= 1 and shape is not null order by ydmj nulls last limit 5"}
+        # page_content是query,metadata是sql
         self.vector_db = FAISS.from_documents(few_shot_docs, embeddings)
 
+    # 以前没有用向量数据库进行相似度搜索,用的是find_most_similar_queries进行字符串匹配实现的这些功能
+    # 现在这2个方法已经被废弃调了,使用get_relevant_documents方法进行替代
     def longest_common_substring(self, str1, str2):
         m, n = len(str1), len(str2)
         dp = [[0] * (n + 1) for _ in range(m + 1)]
@@ -61,34 +65,41 @@ class SqlRetriever():
 
 if __name__ == "__main__":
 
-    def longest_common_substring(str1, str2):
-        m, n = len(str1), len(str2)
-        dp = [[0] * (n + 1) for _ in range(m + 1)]
-        max_length = 0
-        for i in range(1, m + 1):
-            for j in range(1, n + 1):
-                if str1[i - 1] == str2[j - 1]:
-                    dp[i][j] = dp[i - 1][j - 1] + 1
-                    max_length = max(max_length, dp[i][j])
-        return max_length
-
-
-    def find_most_similar_queries(data, text, top_n=3):
-        similarity_scores = [(item, longest_common_substring(item['query'], text)) for item in data]
-        similarity_scores.sort(key=lambda x: x[1], reverse=True)
-        return [item[0] for item in similarity_scores[:top_n]]
-
-
-    # data = [{"query":"example1", "sql_code": "sql1"},{"query":"example2", "sql_code": "sql2"}]
-    # text = "Some input text"
-    # print(find_most_similar_queries(data, text))
-
-    records = []
-    data = json.load(open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data/sql_examples.jsonl'), 'r'))
-    for line in data:
-        records.append(line)
-    results = []
-    for item in find_most_similar_queries(records, '浙江万维今年中了几个标?'):
-        results.append((item['query'], item['sql_code']))
-    print(results)
+    # def longest_common_substring(str1, str2):
+    #     m, n = len(str1), len(str2)
+    #     dp = [[0] * (n + 1) for _ in range(m + 1)]
+    #     max_length = 0
+    #     for i in range(1, m + 1):
+    #         for j in range(1, n + 1):
+    #             if str1[i - 1] == str2[j - 1]:
+    #                 dp[i][j] = dp[i - 1][j - 1] + 1
+    #                 max_length = max(max_length, dp[i][j])
+    #     return max_length
+    #
+    #
+    # def find_most_similar_queries(data, text, top_n=3):
+    #     similarity_scores = [(item, longest_common_substring(item['query'], text)) for item in data]
+    #     similarity_scores.sort(key=lambda x: x[1], reverse=True)
+    #     return [item[0] for item in similarity_scores[:top_n]]
+    #
+    #
+    # # data = [{"query":"example1", "sql_code": "sql1"},{"query":"example2", "sql_code": "sql2"}]
+    # # text = "Some input text"
+    # # print(find_most_similar_queries(data, text))
+    #
+    # records = []
+    # data = json.load(open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data/sql_examples.jsonl'), 'r'))
+    # for line in data:
+    #     records.append(line)
+    # results = []
+    # for item in find_most_similar_queries(records, '浙江万维今年中了几个标?'):
+    #     results.append((item['query'], item['sql_code']))
+    # print(results)
     # print(find_most_similar_queries(records, '浙江万维今年中了几个标?'))
+
+    sql_retrieval = SqlRetriever("land_site_selection")
+    results = sql_retrieval.get_relevant_documents("萧山区推荐几块工业用地", top_k=2)
+    for r in results:
+        print(r)
+        # ('帮我在萧山区推荐几块50亩左右的工业用地,数据表是公告地块', "select id from sde.ecgap_klyzy where xzqmc = '萧山区' and tdyt like '%工业%' and abs(dkmj-5) <= 1 and shape is not null and sfsj=1 order by dkmj nulls last limit 5")
+        # ('帮我在萧山区推荐几块50亩左右的工业用地,数据表是控制性详细规划', "select id from sde.kzxxxgh where xzqmc = '萧山区' and ydxz like '%工业%' and abs(ydmj - 50*0.0667) <= 1 and shape is not null order by ydmj nulls last limit 5")

+ 14 - 16
aiAgent_gd/qwen_agent/memory/plan_memory.py

@@ -1,43 +1,41 @@
-# from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores.faiss import FAISS
 from langchain.schema import Document
 
-import jsonlines
-import json
 import os
 
 from qwen_agent.memory.SqlMemory import embeddings
 from qwen_agent.utils.util import get_data_from_jsons
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 class PlanExampleRetrieval():
     def __init__(self, query_type='bidding') -> None:
-        # self.EMBEDDING_MODEL = "text2vec" # embedding 模型,对应 embedding_model_dict
-        # self.DEVICE = "cuda:2"
 
-        # self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[self.EMBEDDING_MODEL],)
-        # self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
-        #                                                                 device=self.DEVICE)
         few_shot_docs = []
-        # embeddings = OpenAIEmbeddings() LianqiaiAgent/qwen_agent/memory/data/ifbunitplan_examples.jsonl
+
         # 修改成自动读取下面的多个json,方便扩展
         data = get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plan_examples')
 
+        # 将memory/data/plans下的对应的query_type的所有plans作为知识库
         for line in data:
             if line['query_type'] == query_type:
-                few_shot_docs.append(Document(page_content=line['query'], metadata={'plan':line['plan']}))
+                few_shot_docs.append(Document(page_content=line['query'], metadata={'plan': line['plan']}))
 
+        # 将这些rags进行向量化,保存到FAISS数据库中
         self.vector_db = FAISS.from_documents(few_shot_docs, embeddings)
 
-    def get_relevant_documents(self,query,top_k=4):
-        results=[]
+    def get_relevant_documents(self, query, top_k=4):
+        results = []
         for r in self.vector_db.similarity_search(query, k=top_k):
             results.append((r.page_content, r.metadata['plan']))
         return results
 
 
-if __name__=="__main__":
+if __name__ == "__main__":
     print(os.path.abspath(os.path.dirname(__file__)))
-    print(get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plans'))
-    # print(os.path.join(os.path.abspath(os.path.dirname(__file__)),'data/sqls.jsonl'))
+    # for data in get_data_from_jsons(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data'), 'plan'):
+    #     print(data)
+    plan_retrieval = PlanExampleRetrieval("land_site_selection")
+    results = plan_retrieval.get_relevant_documents("萧山区推荐几块工业用地", )
+    for r in results:
+        print(r)

+ 2 - 2
aiAgent_gd/qwen_agent/sub_agent/sql/land_find_sql_agent.py

@@ -9,7 +9,7 @@ from tabulate import tabulate
 from qwen_agent.memory.SqlMemory import SqlRetriever
 from qwen_agent.messages.context_message import ChatResponseChoice
 from qwen_agent.sub_agent.BaseSubAgent import BaseSubAgent
-from qwen_agent.tools.tools import async_xzdb
+from qwen_agent.tools.tools import async_db
 
 
 class LandFindSqlAgent(BaseSubAgent):
@@ -150,7 +150,7 @@ class LandFindSqlAgent(BaseSubAgent):
             print(f"sql_to_execute:{sql_to_execute}")
             self.sql_code = sql_to_execute
             a = time.time()
-            res_tuples = await async_xzdb.run(sql_to_execute)
+            res_tuples = await async_db.run(sql_to_execute)
             print('SQL Time Cost:', time.time() - a)
             result, success = res_tuples
             print(f"SQL 查询结果: success:{success}, result: {result}")

+ 1 - 1
aiAgent_gd/qwen_agent/sub_agent/sql/land_site_selection_sql_agent.py

@@ -10,7 +10,7 @@ from tabulate import tabulate
 from qwen_agent.memory.SqlMemory import SqlRetriever
 from qwen_agent.messages.context_message import ChatResponseChoice
 from qwen_agent.sub_agent.BaseSubAgent import BaseSubAgent
-from qwen_agent.tools.tools import async_db,async_xzdb
+from qwen_agent.tools.tools import async_db
 
 
 class LandSiteSelectionSqlAgent(BaseSubAgent):

+ 0 - 1
aiAgent_gd/qwen_agent/tools/tools.py

@@ -311,7 +311,6 @@ class AsyncPGSearcher:
 # asyncio.run(async_db.register())
 
 async_db = AsyncPGSearcher('pg')
-async_xzdb = AsyncPGSearcher('xzpg')
 
 def modify_sql(sql_query):
     # 使用正则表达式寻找 GROUP BY 和其后面的内容,直到遇到 HAVING, ORDER BY 或者字符串结束

+ 2 - 44
aiAgent_gd/run_server_async.py

@@ -2,12 +2,7 @@ import shutil
 import tempfile
 import time
 import sys
-from typing import List
 from zipfile import ZipFile
-import json
-import fiona
-from pydantic import BaseModel
-from shapely.geometry import shape
 from sse_starlette.sse import EventSourceResponse
 from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.middleware.cors import CORSMiddleware
@@ -26,8 +21,7 @@ from qwen_agent.planning.plan_continue_executor import PlanContinueExecutor
 from qwen_agent.llm.llm_client import LLMClient, LLMAsyncClient
 from agent_config import LLMDict_Qwen_72B_1211, LLMDict_GPT4_TURBO
 from agent_messages import BaseRequest
-from qwen_agent.tools.tools import async_xzdb, async_db
-from qwen_agent.tools.gis.spatial_analysis.geo_analysis import intersect_kfq, intersect_gyyd, xzfx, sqsx, ghfx
+from xuanzhi_query import router as xz_router
 
 prompt_lan = "CN"
 llm_name = "qwen-plus"
@@ -50,7 +44,7 @@ app.add_middleware(
     allow_methods=["*"],
     allow_headers=["*"],
 )
-
+app.include_router(xz_router)
 rspHeaders = {
     "Cache-Control": "no-cache",
     "Connection": "keep-alive",
@@ -176,42 +170,6 @@ async def clarificationByTurbo(request: BaseRequest):
     )
 
 
-@app.get("/kgQuery")
-async def kgQuery(id: str):
-    sql = f'select id, xzqmc, xzqdm, dymc, yddm, ydxz, ydmj, rjlsx, rjlxx, jzmdsx, jzmdxx, jzgdsx, jzgdxx, ldlsx, ldlxx, pfwh, pfsj, st_area(shape::geography) as pfmarea,st_astext(shape) as geom, st_astext(st_centroid(shape)) as center_wkt from sde.kzxxxgh where id in ({id})'
-    res_tuples = await async_db.run(sql)
-    result, success = res_tuples
-    return json.loads(result)
-
-
-@app.get("/klyzyQuery")
-async def klyzyQuery(id: str):
-    sql = f'select *, st_astext(shape) as geom, st_astext(st_centroid(shape)) as center_wkt from sde.ecgap_klyzy where id in ({id})'
-    res_tuples = await async_db.run(sql)
-    result, success = res_tuples
-    return json.loads(result)
-
-
-@app.get("/yjjbntQuery")
-async def yjjbntQuery(id: str):
-    sql = f'select *,st_astext(shape) as geom from ddd.gcs330000g2001_yjjbnt_gx_xsb where objectid in ({id})'
-    res_tuples = await async_xzdb.run(sql)
-    result, success = res_tuples
-    return json.loads(result)
-
-
-@app.get("/kfqintersect")
-async def kfqintersect(wkt: str):
-    result = await intersect_kfq(
-        wkt)
-    return result
-
-
-@app.get("/gyydintersect")
-async def gyydintersect(wkt: str):
-    result = await intersect_gyyd(wkt)
-    return result
-
 llm_client = LLMClient(model=llm_name, model_server=model_server)
 llm_client_async = LLMAsyncClient(model=llm_name, model_server=model_server)
 

+ 42 - 0
aiAgent_gd/xuanzhi_query.py

@@ -0,0 +1,42 @@
+import json
+
+from fastapi import FastAPI, APIRouter
+
+from qwen_agent.tools.tools import async_db
+from qwen_agent.tools.gis.spatial_analysis.geo_analysis import intersect_kfq, intersect_gyyd
+router = APIRouter()
+@router.get("/kgQuery")
+async def kgQuery(id: str):
+    sql = f'select id, xzqmc, xzqdm, dymc, yddm, ydxz, ydmj, rjlsx, rjlxx, jzmdsx, jzmdxx, jzgdsx, jzgdxx, ldlsx, ldlxx, pfwh, pfsj, st_area(shape::geography) as pfmarea,st_astext(shape) as geom, st_astext(st_centroid(shape)) as center_wkt from sde.kzxxxgh where id in ({id})'
+    res_tuples = await async_db.run(sql)
+    result, success = res_tuples
+    return json.loads(result)
+
+
+@router.get("/klyzyQuery")
+async def klyzyQuery(id: str):
+    sql = f'select *, st_astext(shape) as geom, st_astext(st_centroid(shape)) as center_wkt from sde.ecgap_klyzy where id in ({id})'
+    res_tuples = await async_db.run(sql)
+    result, success = res_tuples
+    return json.loads(result)
+
+
+@router.get("/yjjbntQuery")
+async def yjjbntQuery(id: str):
+    sql = f'select *,st_astext(shape) as geom from ddd.gcs330000g2001_yjjbnt_gx_xsb where objectid in ({id})'
+    res_tuples = await async_db.run(sql)
+    result, success = res_tuples
+    return json.loads(result)
+
+
+@router.get("/kfqintersect")
+async def kfqintersect(wkt: str):
+    result = await intersect_kfq(
+        wkt)
+    return result
+
+
+@router.get("/gyydintersect")
+async def gyydintersect(wkt: str):
+    result = await intersect_gyyd(wkt)
+    return result