similarity_search_keyword.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. from qwen_agent.schema import RefMaterial
  2. from qwen_agent.utils.util import get_key_word
  3. class SSKeyWord:
  4. def __init__(self, llm=None, stream=False):
  5. self.llm = llm
  6. self.stream = stream
  7. def run(self, line, query):
  8. """
  9. Input: one line
  10. Output: the relative text
  11. """
  12. wordlist = get_key_word(query)
  13. content = line['raw']
  14. if isinstance(content, str):
  15. content = content.split('\n')
  16. res = []
  17. for page in content:
  18. rel_text = self.filter_section(page, wordlist)
  19. if rel_text:
  20. res.append(rel_text)
  21. return RefMaterial(url=line['url'], text=res).to_dict()
  22. def filter_section(self, page, wordlist):
  23. if isinstance(page, str):
  24. text = page
  25. elif isinstance(page, dict):
  26. text = page['page_content']
  27. else:
  28. print(type(page))
  29. raise TypeError
  30. for x in wordlist:
  31. if x in text:
  32. return text
  33. return ''