1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- import copy
- import logging
- import os
- from pprint import pformat
- from typing import List
- from qwen_agent.llm import ModelServiceError
- from qwen_agent.llm.base import register_llm
- from qwen_agent.llm.oai import TextChatAtOAI
- from qwen_agent.llm.schema import ContentItem, Message
- from qwen_agent.log import logger
- from qwen_agent.utils.utils import encode_image_as_base64
- @register_llm('qwenvl_oai')
- class QwenVLChatAtOAI(TextChatAtOAI):
- @property
- def support_multimodal_input(self) -> bool:
- return True
- @staticmethod
- def convert_messages_to_dicts(messages: List[Message]) -> List[dict]:
- new_messages = []
- for msg in messages:
- content = msg.content
- if isinstance(content, str):
- content = [ContentItem(text=content)]
- assert isinstance(content, list)
- new_content = []
- for item in content:
- t, v = item.get_type_and_value()
- if t == 'text':
- new_content.append({'type': 'text', 'text': v})
- if t == 'image':
- if v.startswith('file://'):
- v = v[len('file://'):]
- if not v.startswith(('http://', 'https://', 'data:')):
- if os.path.exists(v):
- v = encode_image_as_base64(v, max_short_side_length=1080)
- else:
- raise ModelServiceError(f'Local image "{v}" does not exist.')
- new_content.append({'type': 'image_url', 'image_url': {'url': v}})
- new_msg = msg.model_dump()
- new_msg['content'] = new_content
- new_messages.append(new_msg)
- if logger.isEnabledFor(logging.DEBUG):
- lite_messages = copy.deepcopy(new_messages)
- for msg in lite_messages:
- for item in msg['content']:
- if item.get('image_url', {}).get('url', '').startswith('data:'):
- item['image_url']['url'] = item['image_url']['url'][:64] + '...'
- logger.debug(f'LLM Input:\n{pformat(lite_messages, indent=2)}')
- return new_messages
|