dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
output_type: generate_until
doc_to_text: |-
  Given the following question and four candidate answers (A, B, C and D), choose the best answer.

  Question: {{question.strip()}}
  A. {{choices[0]}}
  B. {{choices[1]}}
  C. {{choices[2]}}
  D. {{choices[3]}}

  - For simple problems:
  Directly provide the answer with minimal explanation.

  - For complex problems:
  Use this step-by-step format:
  ## Step 1: [Concise description]
  [Brief explanation]
  ## Step 2: [Concise description]
  [Brief explanation]
  ## Step 3: [Concise description]
  [Brief explanation]
  Continue as needed.

  Regardless of the approach, always conclude with:
  The final answer is [the_answer_letter].
  where the [the_answer_letter] is one of A, B, C or D.

  Let's think step by step.
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
filter_list:
  - name: "tigerlab-extract"
    filter:
      - function: "fallback-regex"
        regex_patterns:
          - 'answer is \(?([A-J])\)?'
          - '.*[aA]nswer:\s*([A-J])'
          - '\b[A-J]\b(?!.*\b[A-J]\b)'
        group_select: [1, 1, 0]
        fallback: "[invalid]"
      - function: "take_first"
generation_kwargs:
  max_gen_toks: 1024
  do_sample: false
  temperature: 0.0
  until:
    - "</s>"
    - "<|eot_id|>"
    - "<|im_end|>"
    - "<|endoftext|>"
    - "<extra_id_1>"
metadata:
  version: 0.0