data_utils.py 718 B

12345678910111213141516171819202122232425262728
  1. import json
  2. import logging
  3. import tqdm
  4. def load_jsonl(path):
  5. data = []
  6. with open(path, 'r', encoding='utf8') as f:
  7. for idx, line in enumerate(f, start=1):
  8. try:
  9. data.append(json.loads(line))
  10. except Exception as e:
  11. logging.info(line)
  12. logging.warning(f'Error at line {idx}: {e}')
  13. continue
  14. return data
  15. def save_jsonl(data, path, progress=False, enabled=True):
  16. if not enabled:
  17. return
  18. with open(path, 'w', encoding='utf-8') as f:
  19. if progress:
  20. data = tqdm(data)
  21. for item in data:
  22. line = json.dumps(item, ensure_ascii=False)
  23. print(line, file=f)