Few-shot Benchmark Report

Run ID: 20260506T191515Z | Generated UTC: 2026-05-06T19:17:38.931527+00:00

Configuration

{
  "models": [
    "gemini:gemini-2.5-flash",
    "gemini:gemini-2.5-pro",
    "openai:4.1",
    "openai:5-mini",
    "openai:5.2",
    "openai:5.4",
    "opus-4-5",
    "opus-4-6",
    "sonnet-4-5",
    "sonnet-4-6"
  ],
  "fewshot_strategies": [
    "raw_only",
    "zero_shot"
  ],
  "chat_count": 10,
  "runs_per_chat": 1,
  "max_workers": 25,
  "raw_fewshot_count": 5,
  "raw_fewshot_labels_filter": [
    "[chats] multiple_product_multiple_shipment_complex.json",
    "[chats] single_product_multiple_shipment_complex.json",
    "[chats] single_product_single_shipment_complex.json",
    "[updates] update_change_quantity.json",
    "[updates] update_change_unit_price.json"
  ]
}

Model + Strategy Summary

ModelStrategyRunsSuccess rateAvg attempts Avg elapsed (s)Avg mismatch/expected runField match rate
gemini:gemini-2.5-flashraw_only101.00001.000017.69294.10000.8808
gemini:gemini-2.5-flashzero_shot101.00001.000016.64657.40000.7849
gemini:gemini-2.5-proraw_only101.00001.000024.45893.70000.8836
gemini:gemini-2.5-prozero_shot101.00001.000022.91094.20000.8679
openai:4.1raw_only101.00001.00002.80123.40000.9003
openai:4.1zero_shot101.00001.00002.10205.10000.8517
openai:5-miniraw_only101.00001.000041.79504.30000.8701
openai:5-minizero_shot101.00001.000053.10127.20000.7966
openai:5.2raw_only101.00001.00003.33024.90000.8506
openai:5.2zero_shot101.00001.00002.56086.60000.7391
openai:5.4raw_only101.00001.00002.93655.10000.8459
openai:5.4zero_shot101.00001.00003.00997.20000.7736
opus-4-5raw_only101.00001.000066.05531.80000.9472
opus-4-5zero_shot101.00001.000030.46613.00000.9120
opus-4-6raw_only101.00001.00006.46842.60000.9238
opus-4-6zero_shot101.00001.00006.62052.20000.9401
sonnet-4-5raw_only101.00001.00006.21053.10000.8974
sonnet-4-5zero_shot101.00001.00006.30913.70000.8915
sonnet-4-6raw_only101.00001.00005.97683.20000.9062
sonnet-4-6zero_shot101.00001.00006.37544.10000.8798

Per-chat Breakdown

ChatModelStrategyRuns Success rateAvg elapsed (s)Mismatch counts
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashraw_only11.000024.4175[17]
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashzero_shot11.000019.3974[15]
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-proraw_only11.000027.5888[9]
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-prozero_shot11.000029.8520[9]
multiple_product_multiple_shipment_medium.jsonopenai:4.1raw_only11.00004.0248[8]
multiple_product_multiple_shipment_medium.jsonopenai:4.1zero_shot11.00002.6363[15]
multiple_product_multiple_shipment_medium.jsonopenai:5-miniraw_only11.000050.0902[6]
multiple_product_multiple_shipment_medium.jsonopenai:5-minizero_shot11.000077.3563[3]
multiple_product_multiple_shipment_medium.jsonopenai:5.2raw_only11.00004.8770[8]
multiple_product_multiple_shipment_medium.jsonopenai:5.2zero_shot11.00004.2578[6]
multiple_product_multiple_shipment_medium.jsonopenai:5.4raw_only11.00003.8732[15]
multiple_product_multiple_shipment_medium.jsonopenai:5.4zero_shot11.00003.8635[12]
multiple_product_multiple_shipment_medium.jsonopus-4-5raw_only11.000068.1686[7]
multiple_product_multiple_shipment_medium.jsonopus-4-5zero_shot11.000067.8798[5]
multiple_product_multiple_shipment_medium.jsonopus-4-6raw_only11.00009.4180[7]
multiple_product_multiple_shipment_medium.jsonopus-4-6zero_shot11.00007.4383[0]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5raw_only11.00007.7244[7]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot11.00008.0789[6]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6raw_only11.00007.1833[7]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot11.00009.1098[7]
multiple_product_multiple_shipment_simple.jsongemini:gemini-2.5-flashraw_only11.000024.1515[1]
multiple_product_multiple_shipment_simple.jsongemini:gemini-2.5-flashzero_shot11.000018.2436[9]
multiple_product_multiple_shipment_simple.jsongemini:gemini-2.5-proraw_only11.000020.8939[1]
multiple_product_multiple_shipment_simple.jsongemini:gemini-2.5-prozero_shot11.000022.5534[5]
multiple_product_multiple_shipment_simple.jsonopenai:4.1raw_only11.00002.2639[3]
multiple_product_multiple_shipment_simple.jsonopenai:4.1zero_shot11.00002.1988[8]
multiple_product_multiple_shipment_simple.jsonopenai:5-miniraw_only11.000045.1275[6]
multiple_product_multiple_shipment_simple.jsonopenai:5-minizero_shot11.000051.1225[12]
multiple_product_multiple_shipment_simple.jsonopenai:5.2raw_only11.00003.4122[6]
multiple_product_multiple_shipment_simple.jsonopenai:5.2zero_shot11.00002.9509[11]
multiple_product_multiple_shipment_simple.jsonopenai:5.4raw_only11.00003.1731[7]
multiple_product_multiple_shipment_simple.jsonopenai:5.4zero_shot11.00003.1447[11]
multiple_product_multiple_shipment_simple.jsonopus-4-5raw_only11.000066.4252[0]
multiple_product_multiple_shipment_simple.jsonopus-4-5zero_shot11.000066.0680[6]
multiple_product_multiple_shipment_simple.jsonopus-4-6raw_only11.00006.4243[1]
multiple_product_multiple_shipment_simple.jsonopus-4-6zero_shot11.00007.8250[6]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5raw_only11.00005.3519[1]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5zero_shot11.00007.5426[7]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6raw_only11.00006.4574[1]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6zero_shot11.00005.7162[7]
real_world_msgs_test_v1.jsongemini:gemini-2.5-flashraw_only11.000017.3677[2]
real_world_msgs_test_v1.jsongemini:gemini-2.5-flashzero_shot11.000018.5203[3]
real_world_msgs_test_v1.jsongemini:gemini-2.5-proraw_only11.000028.4204[5]
real_world_msgs_test_v1.jsongemini:gemini-2.5-prozero_shot11.000022.5518[4]
real_world_msgs_test_v1.jsonopenai:4.1raw_only11.00001.8193[1]
real_world_msgs_test_v1.jsonopenai:4.1zero_shot11.00001.6631[2]
real_world_msgs_test_v1.jsonopenai:5-miniraw_only11.000041.5353[5]
real_world_msgs_test_v1.jsonopenai:5-minizero_shot11.000058.5298[3]
real_world_msgs_test_v1.jsonopenai:5.2raw_only11.00003.2502[3]
real_world_msgs_test_v1.jsonopenai:5.2zero_shot11.00002.4129[3]
real_world_msgs_test_v1.jsonopenai:5.4raw_only11.00002.8770[3]
real_world_msgs_test_v1.jsonopenai:5.4zero_shot11.00002.3172[3]
real_world_msgs_test_v1.jsonopus-4-5raw_only11.000065.0727[1]
real_world_msgs_test_v1.jsonopus-4-5zero_shot11.000065.0415[0]
real_world_msgs_test_v1.jsonopus-4-6raw_only11.00004.8246[3]
real_world_msgs_test_v1.jsonopus-4-6zero_shot11.00006.8272[3]
real_world_msgs_test_v1.jsonsonnet-4-5raw_only11.00006.3981[3]
real_world_msgs_test_v1.jsonsonnet-4-5zero_shot11.00005.3792[3]
real_world_msgs_test_v1.jsonsonnet-4-6raw_only11.00005.0333[4]
real_world_msgs_test_v1.jsonsonnet-4-6zero_shot11.00008.4732[7]
real_world_msgs_test_v2.jsongemini:gemini-2.5-flashraw_only11.000016.8065[2]
real_world_msgs_test_v2.jsongemini:gemini-2.5-flashzero_shot11.000018.7872[1]
real_world_msgs_test_v2.jsongemini:gemini-2.5-proraw_only11.000040.5519[4]
real_world_msgs_test_v2.jsongemini:gemini-2.5-prozero_shot11.000020.5013[3]
real_world_msgs_test_v2.jsonopenai:4.1raw_only11.00002.5580[4]
real_world_msgs_test_v2.jsonopenai:4.1zero_shot11.00001.8426[5]
real_world_msgs_test_v2.jsonopenai:5-miniraw_only11.000055.9184[6]
real_world_msgs_test_v2.jsonopenai:5-minizero_shot11.000048.4700[14]
real_world_msgs_test_v2.jsonopenai:5.2raw_only11.00003.2292[10]
real_world_msgs_test_v2.jsonopenai:5.2zero_shot11.00002.3842[4]
real_world_msgs_test_v2.jsonopenai:5.4raw_only11.00003.1230[2]
real_world_msgs_test_v2.jsonopenai:5.4zero_shot11.00003.2848[3]
real_world_msgs_test_v2.jsonopus-4-5raw_only11.000066.8444[3]
real_world_msgs_test_v2.jsonopus-4-5zero_shot11.000066.8127[4]
real_world_msgs_test_v2.jsonopus-4-6raw_only11.00007.1540[1]
real_world_msgs_test_v2.jsonopus-4-6zero_shot11.00006.8519[3]
real_world_msgs_test_v2.jsonsonnet-4-5raw_only11.00006.6071[3]
real_world_msgs_test_v2.jsonsonnet-4-5zero_shot11.00007.7269[4]
real_world_msgs_test_v2.jsonsonnet-4-6raw_only11.00006.2762[5]
real_world_msgs_test_v2.jsonsonnet-4-6zero_shot11.00006.7010[4]
real_world_msgs_test_v3.jsongemini:gemini-2.5-flashraw_only11.000013.4998[0]
real_world_msgs_test_v3.jsongemini:gemini-2.5-flashzero_shot11.000012.7147[3]
real_world_msgs_test_v3.jsongemini:gemini-2.5-proraw_only11.000019.2120[2]
real_world_msgs_test_v3.jsongemini:gemini-2.5-prozero_shot11.000021.9976[4]
real_world_msgs_test_v3.jsonopenai:4.1raw_only11.00002.3230[4]
real_world_msgs_test_v3.jsonopenai:4.1zero_shot11.00001.6725[4]
real_world_msgs_test_v3.jsonopenai:5-miniraw_only11.000055.1436[5]
real_world_msgs_test_v3.jsonopenai:5-minizero_shot11.000060.2539[6]
real_world_msgs_test_v3.jsonopenai:5.2raw_only11.00003.3434[5]
real_world_msgs_test_v3.jsonopenai:5.2zero_shot11.00002.1556[5]
real_world_msgs_test_v3.jsonopenai:5.4raw_only11.00002.1620[5]
real_world_msgs_test_v3.jsonopenai:5.4zero_shot11.00002.2496[6]
real_world_msgs_test_v3.jsonopus-4-5raw_only11.000065.8757[0]
real_world_msgs_test_v3.jsonopus-4-5zero_shot11.00007.8048[0]
real_world_msgs_test_v3.jsonopus-4-6raw_only11.00006.4163[3]
real_world_msgs_test_v3.jsonopus-4-6zero_shot11.00006.2922[3]
real_world_msgs_test_v3.jsonsonnet-4-5raw_only11.00005.7173[0]
real_world_msgs_test_v3.jsonsonnet-4-5zero_shot11.00005.8899[0]
real_world_msgs_test_v3.jsonsonnet-4-6raw_only11.00005.8065[5]
real_world_msgs_test_v3.jsonsonnet-4-6zero_shot11.00005.6163[5]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-flashraw_only11.000019.8514[5]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-flashzero_shot11.000020.4179[14]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-proraw_only11.000024.9057[5]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-prozero_shot11.000025.5547[5]
single_product_multiple_shipment_complex.jsonopenai:4.1raw_only11.00004.4050[2]
single_product_multiple_shipment_complex.jsonopenai:4.1zero_shot11.00003.3162[2]
single_product_multiple_shipment_complex.jsonopenai:5-miniraw_only11.000035.0704[2]
single_product_multiple_shipment_complex.jsonopenai:5-minizero_shot11.000059.5847[8]
single_product_multiple_shipment_complex.jsonopenai:5.2raw_only11.00004.0087[1]
single_product_multiple_shipment_complex.jsonopenai:5.2zero_shot11.00002.3621[4]
single_product_multiple_shipment_complex.jsonopenai:5.4raw_only11.00003.8625[2]
single_product_multiple_shipment_complex.jsonopenai:5.4zero_shot11.00003.9909[5]
single_product_multiple_shipment_complex.jsonopus-4-5raw_only11.000066.7973[0]
single_product_multiple_shipment_complex.jsonopus-4-5zero_shot11.00008.9170[6]
single_product_multiple_shipment_complex.jsonopus-4-6raw_only11.00007.2364[0]
single_product_multiple_shipment_complex.jsonopus-4-6zero_shot11.00006.8688[0]
single_product_multiple_shipment_complex.jsonsonnet-4-5raw_only11.00008.7301[4]
single_product_multiple_shipment_complex.jsonsonnet-4-5zero_shot11.00007.7598[1]
single_product_multiple_shipment_complex.jsonsonnet-4-6raw_only11.00007.1658[1]
single_product_multiple_shipment_complex.jsonsonnet-4-6zero_shot11.00007.2591[0]
single_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashraw_only11.000016.3803[3]
single_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashzero_shot11.000012.7116[10]
single_product_multiple_shipment_medium.jsongemini:gemini-2.5-proraw_only11.000018.7476[2]
single_product_multiple_shipment_medium.jsongemini:gemini-2.5-prozero_shot11.000019.3498[1]
single_product_multiple_shipment_medium.jsonopenai:4.1raw_only11.00005.0676[3]
single_product_multiple_shipment_medium.jsonopenai:4.1zero_shot11.00002.0462[5]
single_product_multiple_shipment_medium.jsonopenai:5-miniraw_only11.000044.0596[2]
single_product_multiple_shipment_medium.jsonopenai:5-minizero_shot11.000049.0367[6]
single_product_multiple_shipment_medium.jsonopenai:5.2raw_only11.00003.4010[3]
single_product_multiple_shipment_medium.jsonopenai:5.2zero_shot11.00002.5474[10]
single_product_multiple_shipment_medium.jsonopenai:5.4raw_only11.00003.0728[4]
single_product_multiple_shipment_medium.jsonopenai:5.4zero_shot11.00003.4537[10]
single_product_multiple_shipment_medium.jsonopus-4-5raw_only11.000065.9490[0]
single_product_multiple_shipment_medium.jsonopus-4-5zero_shot11.00005.9634[1]
single_product_multiple_shipment_medium.jsonopus-4-6raw_only11.00006.0783[1]
single_product_multiple_shipment_medium.jsonopus-4-6zero_shot11.00006.7440[0]
single_product_multiple_shipment_medium.jsonsonnet-4-5raw_only11.00006.2259[3]
single_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot11.00005.7349[5]
single_product_multiple_shipment_medium.jsonsonnet-4-6raw_only11.00005.7799[0]
single_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot11.00005.4522[0]
single_product_multiple_shipment_simple.jsongemini:gemini-2.5-flashraw_only11.000015.5978[2]
single_product_multiple_shipment_simple.jsongemini:gemini-2.5-flashzero_shot11.000010.2395[6]
single_product_multiple_shipment_simple.jsongemini:gemini-2.5-proraw_only11.000015.7205[2]
single_product_multiple_shipment_simple.jsongemini:gemini-2.5-prozero_shot11.000019.8395[1]
single_product_multiple_shipment_simple.jsonopenai:4.1raw_only11.00002.0221[2]
single_product_multiple_shipment_simple.jsonopenai:4.1zero_shot11.00001.8595[4]
single_product_multiple_shipment_simple.jsonopenai:5-miniraw_only11.000034.7107[2]
single_product_multiple_shipment_simple.jsonopenai:5-minizero_shot11.000044.7850[9]
single_product_multiple_shipment_simple.jsonopenai:5.2raw_only11.00002.7247[2]
single_product_multiple_shipment_simple.jsonopenai:5.2zero_shot11.00001.9819[10]
single_product_multiple_shipment_simple.jsonopenai:5.4raw_only11.00002.7778[2]
single_product_multiple_shipment_simple.jsonopenai:5.4zero_shot11.00002.4385[9]
single_product_multiple_shipment_simple.jsonopus-4-5raw_only11.000065.5790[0]
single_product_multiple_shipment_simple.jsonopus-4-5zero_shot11.00006.0392[3]
single_product_multiple_shipment_simple.jsonopus-4-6raw_only11.00006.9935[0]
single_product_multiple_shipment_simple.jsonopus-4-6zero_shot11.00006.4721[1]
single_product_multiple_shipment_simple.jsonsonnet-4-5raw_only11.00005.2330[0]
single_product_multiple_shipment_simple.jsonsonnet-4-5zero_shot11.00005.6291[1]
single_product_multiple_shipment_simple.jsonsonnet-4-6raw_only11.00005.4035[0]
single_product_multiple_shipment_simple.jsonsonnet-4-6zero_shot11.00005.4475[4]
single_product_single_shipment_complex.jsongemini:gemini-2.5-flashraw_only11.000016.6514[5]
single_product_single_shipment_complex.jsongemini:gemini-2.5-flashzero_shot11.000018.9258[6]
single_product_single_shipment_complex.jsongemini:gemini-2.5-proraw_only11.000024.8766[4]
single_product_single_shipment_complex.jsongemini:gemini-2.5-prozero_shot11.000022.6361[7]
single_product_single_shipment_complex.jsonopenai:4.1raw_only11.00001.7742[4]
single_product_single_shipment_complex.jsonopenai:4.1zero_shot11.00001.6057[4]
single_product_single_shipment_complex.jsonopenai:5-miniraw_only11.000027.5121[5]
single_product_single_shipment_complex.jsonopenai:5-minizero_shot11.000032.8258[3]
single_product_single_shipment_complex.jsonopenai:5.2raw_only11.00002.5637[6]
single_product_single_shipment_complex.jsonopenai:5.2zero_shot11.00002.3637[6]
single_product_single_shipment_complex.jsonopenai:5.4raw_only11.00002.3541[6]
single_product_single_shipment_complex.jsonopenai:5.4zero_shot11.00002.8076[7]
single_product_single_shipment_complex.jsonopus-4-5raw_only11.000065.0618[6]
single_product_single_shipment_complex.jsonopus-4-5zero_shot11.00005.1005[4]
single_product_single_shipment_complex.jsonopus-4-6raw_only11.00005.2086[6]
single_product_single_shipment_complex.jsonopus-4-6zero_shot11.00005.5002[5]
single_product_single_shipment_complex.jsonsonnet-4-5raw_only11.00005.7262[6]
single_product_single_shipment_complex.jsonsonnet-4-5zero_shot11.00004.7600[6]
single_product_single_shipment_complex.jsonsonnet-4-6raw_only11.00005.3598[6]
single_product_single_shipment_complex.jsonsonnet-4-6zero_shot11.00005.4656[4]
single_product_single_shipment_medium.jsongemini:gemini-2.5-flashraw_only11.000012.2054[4]
single_product_single_shipment_medium.jsongemini:gemini-2.5-flashzero_shot11.000016.5070[7]
single_product_single_shipment_medium.jsongemini:gemini-2.5-proraw_only11.000023.6711[3]
single_product_single_shipment_medium.jsongemini:gemini-2.5-prozero_shot11.000024.2732[3]
single_product_single_shipment_medium.jsonopenai:4.1raw_only11.00001.7544[3]
single_product_single_shipment_medium.jsonopenai:4.1zero_shot11.00002.1795[2]
single_product_single_shipment_medium.jsonopenai:5-miniraw_only11.000028.7825[4]
single_product_single_shipment_medium.jsonopenai:5-minizero_shot11.000049.0476[8]
single_product_single_shipment_medium.jsonopenai:5.2raw_only11.00002.4921[5]
single_product_single_shipment_medium.jsonopenai:5.2zero_shot11.00002.1914[7]
single_product_single_shipment_medium.jsonopenai:5.4raw_only11.00002.0898[5]
single_product_single_shipment_medium.jsonopenai:5.4zero_shot11.00002.5482[6]
single_product_single_shipment_medium.jsonopus-4-5raw_only11.000064.7796[1]
single_product_single_shipment_medium.jsonopus-4-5zero_shot11.00005.0346[1]
single_product_single_shipment_medium.jsonopus-4-6raw_only11.00004.9299[4]
single_product_single_shipment_medium.jsonopus-4-6zero_shot11.00005.3854[1]
single_product_single_shipment_medium.jsonsonnet-4-5raw_only11.00004.3908[4]
single_product_single_shipment_medium.jsonsonnet-4-5zero_shot11.00004.5897[4]
single_product_single_shipment_medium.jsonsonnet-4-6raw_only11.00005.3027[3]
single_product_single_shipment_medium.jsonsonnet-4-6zero_shot11.00004.5127[3]

Top Mismatches (up to 100 runs)

ChatModelStrategyRunMismatch countSample mismatches
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashraw_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 23.75
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2027-02-28"
  },
  {
    "path": "data[0].items[0].total",
    "expected": null,
    "actual": 285.0
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:5.4raw_only115
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": null,
    "actual": 3600.0
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "boxes",
    "actual": "BOXES"
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:4.1zero_shot115
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD"
  },
  {
    "path": "data[0].items[0].total",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "boxes",
    "actual": "BOXES"
  }
]
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashzero_shot115
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 23.75
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2027-02-28"
  },
  {
    "path": "data[0].items[0].total",
    "expected": null,
    "actual": 285.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": null,
    "actual": 11.4
  }
]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-flashzero_shot114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[0].delivery_terms",
    "expected": "FOB Singapore",
    "actual": "FOB Singapore 100 Finance Ave"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2027-02-28"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  }
]
real_world_msgs_test_v2.jsonopenai:5-minizero_shot114
[
  {
    "path": "data[0].items[0].quantity",
    "expected": 23.0,
    "actual": null
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "MT",
    "actual": ""
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF",
    "actual": ""
  },
  {
    "path": "data[0].items[0].delivery_terms",
    "expected": "CIF Busan",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:5-minizero_shot112
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:5.4zero_shot112
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": "100 Finance Ave Singapore 018989, Gate B"
  },
  {
    "path": "data[0].items[0].total",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": null,
    "actual": 360.0
  },
  {
    "path": "data[0].items[1].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": "100 Finance Ave Singapore 018989, Gate B"
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:5.4zero_shot111
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:5.2zero_shot111
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  }
]
single_product_multiple_shipment_simple.jsonopenai:5.2zero_shot110
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 15.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  }
]
single_product_multiple_shipment_medium.jsonopenai:5.4zero_shot110
[
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee medium roast"
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "BAG"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[0].packing",
    "expected": "",
    "actual": "1kg bags"
  },
  {
    "path": "data[0].items[1].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee medium roast"
  }
]
single_product_multiple_shipment_medium.jsonopenai:5.2zero_shot110
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee (medium roast)"
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 12.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-31",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsongemini:gemini-2.5-flashzero_shot110
[
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee medium roast"
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[1].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee medium roast"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  }
]
real_world_msgs_test_v2.jsonopenai:5.2raw_only110
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": ""
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": ""
  },
  {
    "path": "data[0].vendor_name",
    "expected": "AG Lipids Pte Ltd",
    "actual": ""
  },
  {
    "path": "data[0].delivery_terms",
    "expected": "CIF Busan",
    "actual": ""
  },
  {
    "path": "data[1].items[0].quantity",
    "expected": 23.0,
    "actual": 18.0
  }
]
single_product_multiple_shipment_simple.jsonopenai:5.4zero_shot19
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 15.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  }
]
single_product_multiple_shipment_simple.jsonopenai:5-minizero_shot19
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  }
]
multiple_product_multiple_shipment_simple.jsongemini:gemini-2.5-flashzero_shot19
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BOXES",
    "actual": "boxes"
  }
]
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-prozero_shot19
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 23.75
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAGS"
  }
]
multiple_product_multiple_shipment_medium.jsongemini:gemini-2.5-proraw_only19
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 23.75
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
single_product_single_shipment_medium.jsonopenai:5-minizero_shot18
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].items[0].delivery_terms",
    "expected": "",
    "actual": "100 Finance Ave Singapore 018989"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  }
]
single_product_multiple_shipment_complex.jsonopenai:5-minizero_shot18
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].items[2].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:4.1zero_shot18
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BOX",
    "actual": "$"
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:5.2raw_only18
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:4.1raw_only18
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 285.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
single_product_single_shipment_medium.jsonopenai:5.2zero_shot17
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": ""
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_single_shipment_medium.jsongemini:gemini-2.5-flashzero_shot17
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonopenai:5.4zero_shot17
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St. Gate B"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsongemini:gemini-2.5-prozero_shot17
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].delivery_terms",
    "expected": "FOB Singapore",
    "actual": "FOB Singapore, our delivery included."
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
real_world_msgs_test_v1.jsonsonnet-4-6zero_shot17
[
  {
    "path": "data[0].items[0].description",
    "expected": "soy lecithin powder",
    "actual": "Soy Lecithin Powder"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 4.1,
    "actual": 4100.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/KG",
    "actual": "USD/MT"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "",
    "actual": "Busan"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "12MT/20'FCL",
    "actual": "2x 12MT/20'FCL"
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6zero_shot17
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/BAGS"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].description",
    "expected": "Assam tea",
    "actual": "Assam Tea"
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5zero_shot17
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/UNIT"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BOX",
    "actual": "USD/UNIT"
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:5.4raw_only17
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BOX",
    "actual": ""
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-06-30",
    "actual": ""
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot17
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6raw_only17
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5raw_only17
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 23.75
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-6raw_only17
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-5raw_only17
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "",
    "actual": "USD/BAG"
  }
]
single_product_single_shipment_medium.jsonopenai:5.4zero_shot16
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonsonnet-4-6raw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonsonnet-4-5zero_shot16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonsonnet-4-5raw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonopus-4-6raw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonopus-4-5raw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonopenai:5.4raw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonopenai:5.2zero_shot16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": ""
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": ""
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": ""
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonopenai:5.2raw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsongemini:gemini-2.5-flashzero_shot16
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "$ per bag"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].payment_date",
    "expected": "",
    "actual": "Net 30 from delivery"
  },
  {
    "path": "data[0].delivery_terms",
    "expected": "FOB Singapore",
    "actual": "FOB Singapore, our delivery included"
  }
]
single_product_multiple_shipment_simple.jsongemini:gemini-2.5-flashzero_shot16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].payment_date",
    "expected": "",
    "actual": "Net 30 from last delivery"
  }
]
single_product_multiple_shipment_medium.jsonopenai:5-minizero_shot16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/bag"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-31",
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonopus-4-5zero_shot16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[2].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  }
]
real_world_msgs_test_v3.jsonopenai:5.4zero_shot16
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/KG",
    "actual": "$/KG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  }
]
real_world_msgs_test_v3.jsonopenai:5-minizero_shot16
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/KG",
    "actual": "USD/kg"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  }
]
real_world_msgs_test_v2.jsonopenai:5-miniraw_only16
[
  {
    "path": "data",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": ""
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": ""
  },
  {
    "path": "data[0].vendor_name",
    "expected": "AG Lipids Pte Ltd",
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-6zero_shot16
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].description",
    "expected": "Assam tea",
    "actual": "Assam Tea"
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].total",
    "expected": 240.0,
    "actual": 4800.0
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-5zero_shot16
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BOX",
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:5.2raw_only16
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].total",
    "expected": 240.0,
    "actual": 4800.0
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonopenai:5-miniraw_only16
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 250.0
  },
  {
    "path": "data[0].items[0].total",
    "expected": 250.0,
    "actual": 2500.0
  },
  {
    "path": "data[0].items[1].unit_price",
    "expected": 12.0,
    "actual": 240.0
  },
  {
    "path": "data[0].items[1].total",
    "expected": 240.0,
    "actual": 4800.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-06-30",
    "actual": ""
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot16
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": ""
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-05",
    "actual": "2026-02-28"
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:5.2zero_shot16
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-05",
    "actual": "2026-02-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
multiple_product_multiple_shipment_medium.jsonopenai:5-miniraw_only16
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "boxes",
    "actual": "BOXES"
  },
  {
    "path": "data[0].items[2].description",
    "expected": "Copy paper",
    "actual": "copy paper"
  },
  {
    "path": "data[0].items[2].quantity_unit",
    "expected": "reams",
    "actual": "REAMS"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-05",
    "actual": ""
  }
]
single_product_single_shipment_medium.jsonopenai:5.4raw_only15
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": ""
  }
]
single_product_single_shipment_medium.jsonopenai:5.2raw_only15
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonopus-4-6zero_shot15
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonopenai:5-miniraw_only15
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsongemini:gemini-2.5-flashraw_only15
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot15
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": ""
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": ""
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": ""
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-31",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopenai:4.1zero_shot15
[
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee (Medium Roast)"
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "BAG"
  },
  {
    "path": "data[0].items[1].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee (Medium Roast)"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "BAG"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-31",
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonopenai:5.4zero_shot15
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].items[2].pricing_unit",
    "expected": "USD/BAG",
    "actual": "$/BAG"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-10",
    "actual": ""
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-prozero_shot15
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2027-02-28"
  },
  {
    "path": "data[0].items[1].shipment_date",
    "expected": "2026-03-04",
    "actual": "2027-03-04"
  },
  {
    "path": "data[0].items[2].shipment_date",
    "expected": "2026-03-10",
    "actual": "2027-03-10"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-10",
    "actual": "2027-03-10"
  },
  {
    "path": "data[0].po_date",
    "expected": "",
    "actual": "2025-11-28"
  }
]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-proraw_only15
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2027-02-28"
  },
  {
    "path": "data[0].items[1].shipment_date",
    "expected": "2026-03-04",
    "actual": "2027-03-04"
  },
  {
    "path": "data[0].items[2].shipment_date",
    "expected": "2026-03-10",
    "actual": "2027-03-10"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-10",
    "actual": "2027-03-10"
  },
  {
    "path": "data[0].billing_address",
    "expected": "",
    "actual": "Leonardo da Vinci"
  }
]
single_product_multiple_shipment_complex.jsongemini:gemini-2.5-flashraw_only15
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2027-02-28"
  },
  {
    "path": "data[0].items[1].shipment_date",
    "expected": "2026-03-04",
    "actual": "2027-03-04"
  },
  {
    "path": "data[0].items[2].shipment_date",
    "expected": "2026-03-10",
    "actual": "2027-03-10"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-10",
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": "",
    "actual": "Net 30 Days"
  }
]
real_world_msgs_test_v3.jsonsonnet-4-6zero_shot15
[
  {
    "path": "data[0].items[0].description",
    "expected": "lecithin fat powder",
    "actual": "Lecithin Fat Powder"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  },
  {
    "path": "data[0].items[1].description",
    "expected": "lecithin fat powder",
    "actual": "Lecithin Fat Powder"
  },
  {
    "path": "data[0].items[1].total",
    "expected": 144000.0,
    "actual": null
  },
  {
    "path": "data[0].do_date",
    "expected": "",
    "actual": "2026-03-31"
  }
]
real_world_msgs_test_v3.jsonsonnet-4-6raw_only15
[
  {
    "path": "data[0].items[0].description",
    "expected": "lecithin fat powder",
    "actual": "Lecithin Fat Powder"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  },
  {
    "path": "data[0].items[1].description",
    "expected": "lecithin fat powder",
    "actual": "Lecithin Fat Powder"
  },
  {
    "path": "data[0].items[1].total",
    "expected": 144000.0,
    "actual": null
  },
  {
    "path": "data[0].do_date",
    "expected": "",
    "actual": "2026-03-31"
  }
]
real_world_msgs_test_v3.jsonopenai:5.4raw_only15
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopenai:5.2zero_shot15
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": 240000.0
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopenai:5.2raw_only15
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopenai:5-miniraw_only15
[
  {
    "path": "data[0].items",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 20.0
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-6raw_only15
[
  {
    "path": "data[0].vendor_name",
    "expected": "AG Lipids Pte Ltd",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[1].items[0].unit_price",
    "expected": 4.2,
    "actual": 4200.0
  },
  {
    "path": "data[1].items[0].pricing_unit",
    "expected": "USD/KG",
    "actual": "USD/MT"
  },
  {
    "path": "data[1].items[0].loading",
    "expected": "",
    "actual": "18MT/40'FCL"
  },
  {
    "path": "data[1].vendor_name",
    "expected": "AG Lipids Pte Ltd",
    "actual": "Van Beethoven"
  }
]
real_world_msgs_test_v2.jsonopenai:4.1zero_shot15
[
  {
    "path": "data",
    "expected_len": 2,
    "actual_len": 1
  },
  {
    "path": "data[0].items",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-02-28",
    "actual": "2026-02-29"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-02-29"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "AG Lipids Pte Ltd",
    "actual": ""
  }
]
real_world_msgs_test_v1.jsonopenai:5-miniraw_only15
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/KG",
    "actual": "USD/kg"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-11-15",
    "actual": ""
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "12MT/20'FCL",
    "actual": ""
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-11-15",
    "actual": ""
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
real_world_msgs_test_v1.jsongemini:gemini-2.5-proraw_only15
[
  {
    "path": "data[0].items",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 24.0,
    "actual": 12.0
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "",
    "actual": "Busan"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 98400.0,
    "actual": 49200.0
  },
  {
    "path": "data[0].billing_address",
    "expected": "",
    "actual": "Leonardo da Vinci, "
  }
]
multiple_product_multiple_shipment_simple.jsongemini:gemini-2.5-prozero_shot15
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/BAGS"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "Singapore"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BOX",
    "actual": "USD/BOXES"
  },
  {
    "path": "data[0].items[1].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "Singapore"
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": ""
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-5zero_shot15
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 2
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].total",
    "expected": null,
    "actual": 300.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-05",
    "actual": "2026-02-28"
  }
]
single_product_single_shipment_medium.jsonsonnet-4-5zero_shot14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": ""
  }
]
single_product_single_shipment_medium.jsonsonnet-4-5raw_only14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": ""
  }
]
single_product_single_shipment_medium.jsonopus-4-6raw_only14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": ""
  }
]
single_product_single_shipment_medium.jsonopenai:5-miniraw_only14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": "Net 30 Days"
  }
]
single_product_single_shipment_medium.jsongemini:gemini-2.5-flashraw_only14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].payment_date",
    "expected": "Net 30 from delivery",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave Singapore 018989",
    "actual": ""
  }
]
single_product_single_shipment_complex.jsonsonnet-4-6zero_shot14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].shipping_address",
    "expected": "352 Indiana Jones St.",
    "actual": "352 Indiana Jones St"
  }
]
single_product_single_shipment_complex.jsonopus-4-5zero_shot14
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonopenai:4.1zero_shot14
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsonopenai:4.1raw_only14
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_single_shipment_complex.jsongemini:gemini-2.5-proraw_only14
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  },
  {
    "path": "data[0].do_date",
    "expected": "2025-11-28",
    "actual": "2026-11-28"
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-6zero_shot14
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  },
  {
    "path": "data[0].items[1].quantity_unit",
    "expected": "BAGS",
    "actual": "bags"
  },
  {
    "path": "data[0].items[1].pricing_unit",
    "expected": "USD/BAG",
    "actual": "USD/bag"
  }
]
single_product_multiple_shipment_simple.jsonopenai:4.1zero_shot14
[
  {
    "path": "data[0].items[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].items[1].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-06-30",
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": "100 Finance Ave",
    "actual": "100 Finance Ave."
  }
]
single_product_multiple_shipment_medium.jsonopenai:5.4raw_only14
[
  {
    "path": "data[0].items[0].packing",
    "expected": "",
    "actual": "1kg bags"
  },
  {
    "path": "data[0].items[1].packing",
    "expected": "",
    "actual": "1kg bags"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-05-31",
    "actual": ""
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-5raw_only14
[
  {
    "path": "data",
    "expected_len": 1,
    "actual_len": 3
  },
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-03-10",
    "actual": "2026-02-28"
  },
  {
    "path": "data[0].shipping_address",
    "expected": "",
    "actual": "100 Finance Ave"
  }
]
single_product_multiple_shipment_complex.jsonopenai:5.2zero_shot14
[
  {
    "path": "data[0].items",
    "expected_len": 3,
    "actual_len": 1
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 14.0,
    "actual": 32.0
  },
  {
    "path": "data[0].items[0].total",
    "expected": 318.5,
    "actual": 728.0
  },
  {
    "path": "data[0].vendor_name",
    "expected": "Van Beethoven",
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopenai:4.1zero_shot14
[
  {
    "path": "data[0].items[0].shipment_date",
    "expected": "2026-03-31",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 96000.0,
    "actual": null
  },
  {
    "path": "data[0].items[1].shipment_date",
    "expected": "2027-05-31",
    "actual": ""
  },
  {
    "path": "data[0].items[1].total",
    "expected": 144000.0,
    "actual": null
  }
]