|
@@ -3,7 +3,7 @@ import asyncio
|
|
|
from openai import OpenAI, AsyncOpenAI
|
|
|
import copy
|
|
|
# from qwen_agent.llm.base import LLMBase
|
|
|
-
|
|
|
+import re
|
|
|
|
|
|
class LLMClient:
|
|
|
def __init__(self, model='qwen-plus', api_key='none', model_server=''):
|
|
@@ -22,6 +22,7 @@ class LLMClient:
|
|
|
return self._chat_no_stream(query, model, messages, stop=stop, functions=functions)
|
|
|
|
|
|
def _chat_stream(self, query, model, messages=None, stop=None, functions=None, **kvargs):
|
|
|
+ pattern = r'<think>.*?</think>'
|
|
|
print(f'begin: stream to lianqi client, model name: {model}')
|
|
|
if messages:
|
|
|
response = self._client.chat.completions.create(
|
|
@@ -38,7 +39,8 @@ class LLMClient:
|
|
|
for chunk in response:
|
|
|
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
|
|
|
print(chunk.choices[0].delta.content, end='', flush=True)
|
|
|
- yield chunk.choices[0].delta.content
|
|
|
+ result = re.sub(pattern, '', chunk.choices[0].delta.content, flags=re.DOTALL)
|
|
|
+ yield result
|
|
|
|
|
|
def _chat_no_stream(self, query, model=None, messages=None, stop=None, functions=None, **kvargs):
|
|
|
print(f'begin: no stream to lianqi client, model name: {model}')
|
|
@@ -119,7 +121,9 @@ class LLMAsyncClient:
|
|
|
async for chunk in response:
|
|
|
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
|
|
|
print(chunk.choices[0].delta.content, end='', flush=True)
|
|
|
- yield chunk.choices[0].delta.content
|
|
|
+ pattern = r'<think>.*?</think>\n*'
|
|
|
+ result = re.sub(pattern, '', chunk.choices[0].delta.content, flags=re.DOTALL)
|
|
|
+ yield result
|
|
|
|
|
|
async def _chat_no_stream(self, query, model=None, messages=None, stop=None, functions=None, **kvargs):
|
|
|
print(f'begin: no stream to lianqi client, model name: {model}')
|