1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import logging
- import os
- import re
- import numpy as np
- from utils.data_utils import load_jsonl, save_jsonl
- INVALID_ANS = '[invalid]'
- def extract_answer(completion):
- def _get_last_digit(s):
- _PAT_LAST_DIGIT = re.compile(r'(?<=(\s|[\$%#{]))([+-])?(?=(\S))(0|([1-9](\d*|\d{0,2}(,\d{3})*)))?(\.\d*[1-9])?(?=(\s|[.,}]|$))')
- match = list(_PAT_LAST_DIGIT.finditer(s))
- if match:
- last_digit = match[-1].group().replace(',', '').replace('+', '')
- else:
- last_digit = None
- logging.warning(f'No digits found in {s!r}')
- return last_digit
- job_gen = completion.strip('.').replace('\n', '\\n')
- last_digit = _get_last_digit(job_gen)
- if last_digit:
- return eval(last_digit)
- else:
- return INVALID_ANS
- def is_correct(completion, answer):
- gold = extract_answer(answer)
- assert gold != INVALID_ANS, 'No ground truth answer found in the document.'
- return extract_answer(completion) == gold
- def eval_gsm8k_acc(output_fname):
- data_list = load_jsonl(output_fname)
- acc_res = [item['acc'] for item in data_list]
- logging.info('='*60)
- logging.info('{:^60}'.format('Math Acc.'))
- logging.info('='*60)
- logging.info('Total num={:.2f}'.format(len(acc_res)))
- logging.info('Right num={:.2f}'.format(np.sum(acc_res)))
- logging.info('Zero-shot Acc={:.2f}'.format(np.mean(acc_res)*100))
- error_data_list = [item for item in data_list if not item['acc']]
- error_data_output_fname = os.path.splitext(output_fname)[0] + '_gsm8k_error.jsonl'
- save_jsonl(error_data_list, error_data_output_fname)
- return {'math': np.mean(acc_res)*100}
|