test_split: test
output_type: generate_until
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
process_results: !function utils.estimate_pass_at_k_dynamic
generation_kwargs:
  max_gen_toks: 1024
  until:
    - "Problem:"
    - "Question:"
    - "Q:"
    - "</s>"
    - "<|im_end|>"
  do_sample: true
  temperature: 0.7
repeats: 32
filter_list:
  - filter:
      - function: symbolic_match
    name: math_verify
metadata:
  version: 1.1
num_fewshot: 0 # 0 by default, override for base models.
fewshot_config:
  sampler: first_n
  samples: !function utils.standard_math_fewshot_samples
