Few-shot Benchmark Report

Run ID: 20260505T175516Z | Generated UTC: 2026-05-05T17:58:22.110338+00:00

Configuration

{
  "models": [
    "opus-4-5",
    "opus-4-6",
    "sonnet-4-5",
    "sonnet-4-6"
  ],
  "fewshot_strategies": [
    "db_only",
    "db_plus_raw",
    "raw_only",
    "zero_shot"
  ],
  "chat_count": 21,
  "runs_per_chat": 1,
  "max_workers": 15,
  "raw_fewshot_count": 26,
  "raw_fewshot_labels_filter": []
}

Model + Strategy Summary

ModelStrategyRunsSuccess rateAvg attempts Avg elapsed (s)Avg mismatch/expected runField match rate
opus-4-5db_only211.00001.00007.620015.58330.5691
opus-4-5db_plus_raw211.00001.00009.296514.91670.5876
opus-4-5raw_only211.00001.000018.778414.08330.6106
opus-4-5zero_shot211.00001.00006.047413.08330.5154
opus-4-6db_only211.00001.00008.011015.83330.4960
opus-4-6db_plus_raw211.00001.00008.591615.25000.5783
opus-4-6raw_only211.00001.00006.559915.50000.5714
opus-4-6zero_shot211.00001.00005.71369.66670.5226
sonnet-4-5db_only211.00001.00008.002015.00000.5663
sonnet-4-5db_plus_raw211.00001.00008.565415.83330.5622
sonnet-4-5raw_only211.00001.00007.210415.83330.5622
sonnet-4-5zero_shot211.00001.00005.920911.25000.5018
sonnet-4-6db_only211.00001.00007.439116.00000.5576
sonnet-4-6db_plus_raw211.00001.00008.243114.75000.5922
sonnet-4-6raw_only211.00001.00007.311015.00000.5853
sonnet-4-6zero_shot211.00001.00006.267313.41670.5306

Per-chat Breakdown

ChatModelStrategyRuns Success rateAvg elapsed (s)Mismatch counts
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5db_only11.00007.4149[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5db_plus_raw11.00006.8336[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5raw_only11.000018.8554[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5zero_shot11.00004.7726[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6db_only11.00006.4174[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6db_plus_raw11.00006.5779[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6raw_only11.00005.1868[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6zero_shot11.00004.7843[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5db_only11.00007.2004[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5db_plus_raw11.00007.3486[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5raw_only11.00005.5625[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5zero_shot11.00005.3606[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6db_only11.00006.1651[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6db_plus_raw11.00006.4569[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6raw_only11.00005.9357[]
01__2026-02-24__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6zero_shot11.00004.5946[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5db_only11.00007.1330[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5db_plus_raw11.00006.2667[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5raw_only11.000022.5261[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5zero_shot11.00004.7885[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6db_only11.00006.4834[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6db_plus_raw11.00007.5722[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6raw_only11.00006.1256[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6zero_shot11.00005.6175[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5db_only11.00006.1662[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5db_plus_raw11.00006.5212[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5raw_only11.00005.8263[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5zero_shot11.00004.5815[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6db_only11.00006.2705[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6db_plus_raw11.00006.9653[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6raw_only11.00005.1995[]
02__2026-02-09__120363426578757754_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6zero_shot11.00006.9405[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5db_only11.00009.2838[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5db_plus_raw11.00008.4409[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5raw_only11.000025.4663[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5zero_shot11.00008.6536[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6db_only11.000010.5657[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6db_plus_raw11.000010.6273[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6raw_only11.00008.7776[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6zero_shot11.00006.7748[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5db_only11.000010.8318[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5db_plus_raw11.000011.3972[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5raw_only11.000010.2769[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5zero_shot11.00008.2771[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6db_only11.00007.8717[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6db_plus_raw11.000010.1003[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6raw_only11.00008.6061[]
03__2026-01-30__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6zero_shot11.00007.8388[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-5db_only11.00006.0295[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-5db_plus_raw11.00007.0283[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-5raw_only11.000016.0751[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-5zero_shot11.00004.6981[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-6db_only11.00006.5109[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-6db_plus_raw11.00007.4811[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-6raw_only11.00005.4776[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonopus-4-6zero_shot11.00006.0874[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-5db_only11.00006.9890[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-5db_plus_raw11.00006.7700[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-5raw_only11.00005.7561[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-5zero_shot11.00004.3104[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-6db_only11.00005.9267[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-6db_plus_raw11.00006.4885[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-6raw_only11.00005.5721[]
04__2026-01-29__120363408498669191_g_us__4b9c2faa-94dd-4236-abcc-398807051f21.jsonsonnet-4-6zero_shot11.00005.9508[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5db_only11.00006.0621[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5db_plus_raw11.00006.7244[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5raw_only11.000013.8358[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-5zero_shot11.00004.9791[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6db_only11.00006.9344[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6db_plus_raw11.00007.4107[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6raw_only11.00005.2962[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonopus-4-6zero_shot11.00004.6612[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5db_only11.00006.3783[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5db_plus_raw11.00009.6754[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5raw_only11.00005.4187[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-5zero_shot11.00005.5531[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6db_only11.00008.5226[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6db_plus_raw11.00006.5939[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6raw_only11.00005.0300[]
05__2026-01-20__120363407382355715_g_us__12a4f3a7-d506-4d32-ae06-3f76508c6abd.jsonsonnet-4-6zero_shot11.00007.8587[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5db_only11.00006.5811[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5db_plus_raw11.00006.7798[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5raw_only11.000015.7283[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-5zero_shot11.00004.8379[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6db_only11.00006.1738[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6db_plus_raw11.00006.6852[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6raw_only11.00005.1074[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonopus-4-6zero_shot11.00004.8111[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5db_only11.00006.7734[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5db_plus_raw11.00006.7752[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5raw_only11.00004.9116[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-5zero_shot11.00004.7725[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6db_only11.00005.4179[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6db_plus_raw11.00006.3987[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6raw_only11.00005.1768[]
06__2026-01-06__120363421131250401_g_us__e05574ec-b110-4554-9fc3-3abb4f9011a8.jsonsonnet-4-6zero_shot11.00004.2253[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5db_only11.00005.6656[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5db_plus_raw11.00006.1223[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5raw_only11.00006.5972[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-5zero_shot11.00002.5614[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6db_only11.00007.3090[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6db_plus_raw11.00009.0114[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6raw_only11.00004.7776[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonopus-4-6zero_shot11.00002.7851[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5db_only11.00005.0620[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5db_plus_raw11.00004.8666[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5raw_only11.00005.3111[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-5zero_shot11.00003.1151[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6db_only11.00005.1503[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6db_plus_raw11.00006.1501[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6raw_only11.00004.5210[]
07__2025-12-23__120363403074656566_g_us__8f477a8f-2a60-4e0a-bf0e-8cc3cdf1dc9f.jsonsonnet-4-6zero_shot11.00003.8103[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5db_only11.00005.9255[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5db_plus_raw11.00006.5365[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5raw_only11.00005.5620[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5zero_shot11.00004.9165[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6db_only11.00009.9241[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6db_plus_raw11.00007.1211[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6raw_only11.00006.1078[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6zero_shot11.00005.3209[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5db_only11.00007.0477[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5db_plus_raw11.00006.5550[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5raw_only11.00008.9180[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5zero_shot11.00005.2936[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6db_only11.00005.7726[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6db_plus_raw11.00006.5095[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6raw_only11.00005.4982[]
08__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6zero_shot11.00005.3664[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5db_only11.00006.4295[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5db_plus_raw11.00006.6431[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5raw_only11.00005.7966[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-5zero_shot11.00005.3210[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6db_only11.00008.8420[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6db_plus_raw11.00008.2861[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6raw_only11.00005.7267[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonopus-4-6zero_shot11.00005.1820[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5db_only11.00007.0972[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5db_plus_raw11.00009.6886[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5raw_only11.00008.1727[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-5zero_shot11.00005.0026[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6db_only11.00005.7439[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6db_plus_raw11.00007.7584[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6raw_only11.00005.4230[]
09__2025-09-29__120363403592950429_g_us__d586d853-694c-42f9-93be-bc7ba5b2110c.jsonsonnet-4-6zero_shot11.00006.0121[]
multiple_product_multiple_shipment_complex.jsonopus-4-5db_only11.000011.8233[41]
multiple_product_multiple_shipment_complex.jsonopus-4-5db_plus_raw11.000011.2869[43]
multiple_product_multiple_shipment_complex.jsonopus-4-5raw_only11.000013.2243[40]
multiple_product_multiple_shipment_complex.jsonopus-4-5zero_shot11.000012.1762[41]
multiple_product_multiple_shipment_complex.jsonopus-4-6db_only11.000013.1922[47]
multiple_product_multiple_shipment_complex.jsonopus-4-6db_plus_raw11.000013.2831[38]
multiple_product_multiple_shipment_complex.jsonopus-4-6raw_only11.000011.0040[40]
multiple_product_multiple_shipment_complex.jsonopus-4-6zero_shot11.00009.0980[10]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5db_only11.000014.1681[40]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5db_plus_raw11.000013.1987[38]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5raw_only11.000011.8190[38]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5zero_shot11.000010.3334[11]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6db_only11.000010.3829[38]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6db_plus_raw11.000012.9115[33]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6raw_only11.000011.2661[37]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6zero_shot11.000010.5942[34]
multiple_product_multiple_shipment_medium.jsonopus-4-5db_only11.00008.8354[24]
multiple_product_multiple_shipment_medium.jsonopus-4-5db_plus_raw11.00008.2208[22]
multiple_product_multiple_shipment_medium.jsonopus-4-5raw_only11.000032.3467[19]
multiple_product_multiple_shipment_medium.jsonopus-4-5zero_shot11.00008.0086[24]
multiple_product_multiple_shipment_medium.jsonopus-4-6db_only11.00009.5808[24]
multiple_product_multiple_shipment_medium.jsonopus-4-6db_plus_raw11.000010.9837[24]
multiple_product_multiple_shipment_medium.jsonopus-4-6raw_only11.00008.2972[22]
multiple_product_multiple_shipment_medium.jsonopus-4-6zero_shot11.00006.5957[11]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5db_only11.00009.7974[24]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5db_plus_raw11.000010.1036[24]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5raw_only11.00008.4127[22]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot11.00007.9911[26]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6db_only11.000017.6625[24]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6db_plus_raw11.00009.6197[24]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6raw_only11.00007.9933[19]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot11.00007.4295[25]
multiple_product_multiple_shipment_simple.jsonopus-4-5db_only11.00008.0560[16]
multiple_product_multiple_shipment_simple.jsonopus-4-5db_plus_raw11.00007.6224[16]
multiple_product_multiple_shipment_simple.jsonopus-4-5raw_only11.00007.2440[14]
multiple_product_multiple_shipment_simple.jsonopus-4-5zero_shot11.00006.7818[12]
multiple_product_multiple_shipment_simple.jsonopus-4-6db_only11.00007.9957[12]
multiple_product_multiple_shipment_simple.jsonopus-4-6db_plus_raw11.00009.3195[18]
multiple_product_multiple_shipment_simple.jsonopus-4-6raw_only11.00006.6415[18]
multiple_product_multiple_shipment_simple.jsonopus-4-6zero_shot11.00005.5791[13]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5db_only11.00007.4700[10]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5db_plus_raw11.00009.4641[16]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5raw_only11.00007.8005[19]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5zero_shot11.00006.5561[13]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6db_only11.00006.6804[16]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6db_plus_raw11.000014.6380[16]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6raw_only11.000013.7016[16]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6zero_shot11.00006.3163[12]
real_world_msgs_test_v1.jsonopus-4-5db_only11.00007.4782[11]
real_world_msgs_test_v1.jsonopus-4-5db_plus_raw11.00006.6374[8]
real_world_msgs_test_v1.jsonopus-4-5raw_only11.00005.9130[8]
real_world_msgs_test_v1.jsonopus-4-5zero_shot11.00005.1475[8]
real_world_msgs_test_v1.jsonopus-4-6db_only11.00006.8141[8]
real_world_msgs_test_v1.jsonopus-4-6db_plus_raw11.00007.6162[8]
real_world_msgs_test_v1.jsonopus-4-6raw_only11.00005.6609[8]
real_world_msgs_test_v1.jsonopus-4-6zero_shot11.00005.2136[10]
real_world_msgs_test_v1.jsonsonnet-4-5db_only11.00009.9598[11]
real_world_msgs_test_v1.jsonsonnet-4-5db_plus_raw11.00009.4299[11]
real_world_msgs_test_v1.jsonsonnet-4-5raw_only11.00007.7119[12]
real_world_msgs_test_v1.jsonsonnet-4-5zero_shot11.00006.2016[11]
real_world_msgs_test_v1.jsonsonnet-4-6db_only11.00009.0807[8]
real_world_msgs_test_v1.jsonsonnet-4-6db_plus_raw11.00006.6825[8]
real_world_msgs_test_v1.jsonsonnet-4-6raw_only11.00005.1847[9]
real_world_msgs_test_v1.jsonsonnet-4-6zero_shot11.00006.3706[13]
real_world_msgs_test_v2.jsonopus-4-5db_only11.00007.4209[13]
real_world_msgs_test_v2.jsonopus-4-5db_plus_raw11.000025.6978[12]
real_world_msgs_test_v2.jsonopus-4-5raw_only11.00007.1338[13]
real_world_msgs_test_v2.jsonopus-4-5zero_shot11.00006.5915[13]
real_world_msgs_test_v2.jsonopus-4-6db_only11.00008.2137[13]
real_world_msgs_test_v2.jsonopus-4-6db_plus_raw11.000010.0163[13]
real_world_msgs_test_v2.jsonopus-4-6raw_only11.00007.5074[13]
real_world_msgs_test_v2.jsonopus-4-6zero_shot11.00006.6707[14]
real_world_msgs_test_v2.jsonsonnet-4-5db_only11.00008.4967[12]
real_world_msgs_test_v2.jsonsonnet-4-5db_plus_raw11.00008.9594[13]
real_world_msgs_test_v2.jsonsonnet-4-5raw_only11.00008.5859[17]
real_world_msgs_test_v2.jsonsonnet-4-5zero_shot11.00007.0692[13]
real_world_msgs_test_v2.jsonsonnet-4-6db_only11.00007.0131[13]
real_world_msgs_test_v2.jsonsonnet-4-6db_plus_raw11.00008.1633[13]
real_world_msgs_test_v2.jsonsonnet-4-6raw_only11.00007.0535[16]
real_world_msgs_test_v2.jsonsonnet-4-6zero_shot11.00006.9848[15]
real_world_msgs_test_v3.jsonopus-4-5db_only11.00007.5779[13]
real_world_msgs_test_v3.jsonopus-4-5db_plus_raw11.00008.2349[13]
real_world_msgs_test_v3.jsonopus-4-5raw_only11.000028.0913[13]
real_world_msgs_test_v3.jsonopus-4-5zero_shot11.00006.0178[9]
real_world_msgs_test_v3.jsonopus-4-6db_only11.00008.6978[13]
real_world_msgs_test_v3.jsonopus-4-6db_plus_raw11.00008.6996[13]
real_world_msgs_test_v3.jsonopus-4-6raw_only11.00007.2150[13]
real_world_msgs_test_v3.jsonopus-4-6zero_shot11.00006.1454[9]
real_world_msgs_test_v3.jsonsonnet-4-5db_only11.00007.7518[13]
real_world_msgs_test_v3.jsonsonnet-4-5db_plus_raw11.00009.8089[17]
real_world_msgs_test_v3.jsonsonnet-4-5raw_only11.00007.6770[17]
real_world_msgs_test_v3.jsonsonnet-4-5zero_shot11.00005.8478[9]
real_world_msgs_test_v3.jsonsonnet-4-6db_only11.00007.1598[13]
real_world_msgs_test_v3.jsonsonnet-4-6db_plus_raw11.00008.1354[13]
real_world_msgs_test_v3.jsonsonnet-4-6raw_only11.00007.2476[13]
real_world_msgs_test_v3.jsonsonnet-4-6zero_shot11.00005.9217[10]
single_product_multiple_shipment_complex.jsonopus-4-5db_only11.00009.4503[22]
single_product_multiple_shipment_complex.jsonopus-4-5db_plus_raw11.00008.6895[19]
single_product_multiple_shipment_complex.jsonopus-4-5raw_only11.000031.3806[19]
single_product_multiple_shipment_complex.jsonopus-4-5zero_shot11.00009.0301[11]
single_product_multiple_shipment_complex.jsonopus-4-6db_only11.00008.9681[12]
single_product_multiple_shipment_complex.jsonopus-4-6db_plus_raw11.000010.9163[22]
single_product_multiple_shipment_complex.jsonopus-4-6raw_only11.00008.6152[25]
single_product_multiple_shipment_complex.jsonopus-4-6zero_shot11.00007.0087[9]
single_product_multiple_shipment_complex.jsonsonnet-4-5db_only11.000010.3489[22]
single_product_multiple_shipment_complex.jsonsonnet-4-5db_plus_raw11.000011.8474[21]
single_product_multiple_shipment_complex.jsonsonnet-4-5raw_only11.00009.5149[19]
single_product_multiple_shipment_complex.jsonsonnet-4-5zero_shot11.00007.2696[11]
single_product_multiple_shipment_complex.jsonsonnet-4-6db_only11.00008.4816[22]
single_product_multiple_shipment_complex.jsonsonnet-4-6db_plus_raw11.000011.4460[22]
single_product_multiple_shipment_complex.jsonsonnet-4-6raw_only11.00009.3933[22]
single_product_multiple_shipment_complex.jsonsonnet-4-6zero_shot11.00007.4629[11]
single_product_multiple_shipment_medium.jsonopus-4-5db_only11.00008.0808[17]
single_product_multiple_shipment_medium.jsonopus-4-5db_plus_raw11.00008.3669[17]
single_product_multiple_shipment_medium.jsonopus-4-5raw_only11.000028.2005[17]
single_product_multiple_shipment_medium.jsonopus-4-5zero_shot11.00005.9035[12]
single_product_multiple_shipment_medium.jsonopus-4-6db_only11.00008.1109[19]
single_product_multiple_shipment_medium.jsonopus-4-6db_plus_raw11.00009.2996[17]
single_product_multiple_shipment_medium.jsonopus-4-6raw_only11.00008.4515[17]
single_product_multiple_shipment_medium.jsonopus-4-6zero_shot11.00006.2844[12]
single_product_multiple_shipment_medium.jsonsonnet-4-5db_only11.00008.3852[17]
single_product_multiple_shipment_medium.jsonsonnet-4-5db_plus_raw11.00008.2912[19]
single_product_multiple_shipment_medium.jsonsonnet-4-5raw_only11.00007.6496[17]
single_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot11.00005.8609[12]
single_product_multiple_shipment_medium.jsonsonnet-4-6db_only11.00006.9003[17]
single_product_multiple_shipment_medium.jsonsonnet-4-6db_plus_raw11.00008.6039[17]
single_product_multiple_shipment_medium.jsonsonnet-4-6raw_only11.00007.0250[17]
single_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot11.00006.5647[19]
single_product_multiple_shipment_simple.jsonopus-4-5db_only11.00008.3590[14]
single_product_multiple_shipment_simple.jsonopus-4-5db_plus_raw11.00008.1183[13]
single_product_multiple_shipment_simple.jsonopus-4-5raw_only11.000031.6421[12]
single_product_multiple_shipment_simple.jsonopus-4-5zero_shot11.00006.6181[10]
single_product_multiple_shipment_simple.jsonopus-4-6db_only11.00008.3774[23]
single_product_multiple_shipment_simple.jsonopus-4-6db_plus_raw11.00008.8283[14]
single_product_multiple_shipment_simple.jsonopus-4-6raw_only11.00006.5592[14]
single_product_multiple_shipment_simple.jsonopus-4-6zero_shot11.00005.9586[10]
single_product_multiple_shipment_simple.jsonsonnet-4-5db_only11.00007.9908[14]
single_product_multiple_shipment_simple.jsonsonnet-4-5db_plus_raw11.00008.3229[14]
single_product_multiple_shipment_simple.jsonsonnet-4-5raw_only11.00006.6421[13]
single_product_multiple_shipment_simple.jsonsonnet-4-5zero_shot11.00005.7926[10]
single_product_multiple_shipment_simple.jsonsonnet-4-6db_only11.00007.1050[14]
single_product_multiple_shipment_simple.jsonsonnet-4-6db_plus_raw11.00008.4745[14]
single_product_multiple_shipment_simple.jsonsonnet-4-6raw_only11.00006.8847[14]
single_product_multiple_shipment_simple.jsonsonnet-4-6zero_shot11.00005.3136[7]
single_product_single_shipment_complex.jsonopus-4-5db_only11.00007.3926[4]
single_product_single_shipment_complex.jsonopus-4-5db_plus_raw11.00006.8795[4]
single_product_single_shipment_complex.jsonopus-4-5raw_only11.000024.8393[3]
single_product_single_shipment_complex.jsonopus-4-5zero_shot11.00004.9059[6]
single_product_single_shipment_complex.jsonopus-4-6db_only11.00006.4983[5]
single_product_single_shipment_complex.jsonopus-4-6db_plus_raw11.00006.8118[4]
single_product_single_shipment_complex.jsonopus-4-6raw_only11.00005.3122[4]
single_product_single_shipment_complex.jsonopus-4-6zero_shot11.00005.1756[6]
single_product_single_shipment_complex.jsonsonnet-4-5db_only11.00006.4709[5]
single_product_single_shipment_complex.jsonsonnet-4-5db_plus_raw11.00007.4591[5]
single_product_single_shipment_complex.jsonsonnet-4-5raw_only11.00005.6860[5]
single_product_single_shipment_complex.jsonsonnet-4-5zero_shot11.00004.7932[5]
single_product_single_shipment_complex.jsonsonnet-4-6db_only11.00006.6733[5]
single_product_single_shipment_complex.jsonsonnet-4-6db_plus_raw11.00007.7592[5]
single_product_single_shipment_complex.jsonsonnet-4-6raw_only11.00009.7661[5]
single_product_single_shipment_complex.jsonsonnet-4-6zero_shot11.00005.5360[3]
single_product_single_shipment_medium.jsonopus-4-5db_only11.00007.9986[5]
single_product_single_shipment_medium.jsonopus-4-5db_plus_raw11.000026.8310[5]
single_product_single_shipment_medium.jsonopus-4-5raw_only11.000030.6453[4]
single_product_single_shipment_medium.jsonopus-4-5zero_shot11.00005.5309[4]
single_product_single_shipment_medium.jsonopus-4-6db_only11.00006.3622[7]
single_product_single_shipment_medium.jsonopus-4-6db_plus_raw11.00006.8993[5]
single_product_single_shipment_medium.jsonopus-4-6raw_only11.00005.0512[5]
single_product_single_shipment_medium.jsonopus-4-6zero_shot11.00005.1254[5]
single_product_single_shipment_medium.jsonsonnet-4-5db_only11.00007.1266[5]
single_product_single_shipment_medium.jsonsonnet-4-5db_plus_raw11.00006.9029[5]
single_product_single_shipment_medium.jsonsonnet-4-5raw_only11.00004.9418[4]
single_product_single_shipment_medium.jsonsonnet-4-5zero_shot11.00005.1242[6]
single_product_single_shipment_medium.jsonsonnet-4-6db_only11.00005.9295[5]
single_product_single_shipment_medium.jsonsonnet-4-6db_plus_raw11.00006.6344[5]
single_product_single_shipment_medium.jsonsonnet-4-6raw_only11.000011.4231[5]
single_product_single_shipment_medium.jsonsonnet-4-6zero_shot11.00006.0548[5]
single_product_single_shipment_simple.jsonopus-4-5db_only11.00007.0226[7]
single_product_single_shipment_simple.jsonopus-4-5db_plus_raw11.00007.2657[7]
single_product_single_shipment_simple.jsonopus-4-5raw_only11.000023.2421[7]
single_product_single_shipment_simple.jsonopus-4-5zero_shot11.00004.7558[7]
single_product_single_shipment_simple.jsonopus-4-6db_only11.00006.2587[7]
single_product_single_shipment_simple.jsonopus-4-6db_plus_raw11.00006.9760[7]
single_product_single_shipment_simple.jsonopus-4-6raw_only11.00004.8584[7]
single_product_single_shipment_simple.jsonopus-4-6zero_shot11.00005.1069[7]
single_product_single_shipment_simple.jsonsonnet-4-5db_only11.00006.5289[7]
single_product_single_shipment_simple.jsonsonnet-4-5db_plus_raw11.00006.4868[7]
single_product_single_shipment_simple.jsonsonnet-4-5raw_only11.00004.8221[7]
single_product_single_shipment_simple.jsonsonnet-4-5zero_shot11.00005.2322[8]
single_product_single_shipment_simple.jsonsonnet-4-6db_only11.00006.3116[17]
single_product_single_shipment_simple.jsonsonnet-4-6db_plus_raw11.00006.6142[7]
single_product_single_shipment_simple.jsonsonnet-4-6raw_only11.00005.6298[7]
single_product_single_shipment_simple.jsonsonnet-4-6zero_shot11.00004.4674[7]

Top Mismatches (up to 100 runs)

ChatModelStrategyRunMismatch countSample mismatches
multiple_product_multiple_shipment_complex.jsonopus-4-6db_only147
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonopus-4-5db_plus_raw143
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonopus-4-5zero_shot141
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonopus-4-5db_only141
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5db_only140
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonopus-4-6raw_only140
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonopus-4-5raw_only140
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6db_only138
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5raw_only138
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-5db_plus_raw138
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonopus-4-6db_plus_raw138
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6raw_only137
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6zero_shot134
[
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 276.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  }
]
multiple_product_multiple_shipment_complex.jsonsonnet-4-6db_plus_raw133
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.0,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5zero_shot126
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": ""
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": ""
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 3600.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
single_product_multiple_shipment_complex.jsonopus-4-6raw_only125
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot125
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAGS"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 3600.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6db_plus_raw124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6db_only124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5db_plus_raw124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5db_only124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-6db_plus_raw124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-6db_only124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-5zero_shot124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-5db_only124
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
single_product_multiple_shipment_simple.jsonopus-4-6db_only123
[
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "lecithin fat powder"
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "MT"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 12.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 96000.0
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-6raw_only122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-6db_plus_raw122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-6db_only122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-5db_only122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonopus-4-6db_plus_raw122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonopus-4-5db_only122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-5raw_only122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-6raw_only122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-5db_plus_raw122
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 285.0,
    "actual": 300.0
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-5db_plus_raw121
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].payment_date",
    "expected": [
      "Net 30 from last delivery",
      "Net 30",
      "2026-04-08"
    ],
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-6zero_shot119
[
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "KNM Coffee (Medium Roast)"
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-5db_plus_raw119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopus-4-6db_only119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonsonnet-4-5raw_only119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].billing_address",
    "expected": [],
    "actual": "Leonardo da Vinci, da@vinci.com"
  }
]
single_product_multiple_shipment_complex.jsonopus-4-5raw_only119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].billing_address",
    "expected": [],
    "actual": ""
  }
]
single_product_multiple_shipment_complex.jsonopus-4-5db_plus_raw119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "FOB Singapore",
    "actual": "FOB"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  },
  {
    "path": "data[0].billing_address",
    "expected": [],
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5raw_only119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_medium.jsonsonnet-4-6raw_only119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  }
]
multiple_product_multiple_shipment_medium.jsonopus-4-5raw_only119
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 23.75,
    "actual": 25.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "EXW Singapore",
    "actual": "EXW"
  },
  {
    "path": "data[0].vendor_name",
    "expected": "flamingos",
    "actual": "Van Beethoven"
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-6raw_only118
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-6db_plus_raw118
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
single_product_single_shipment_simple.jsonsonnet-4-6db_only117
[
  {
    "path": "data[0].items[0].description",
    "expected": "KNM Coffee",
    "actual": "BP102"
  },
  {
    "path": "data[0].items[0].quantity",
    "expected": 5.0,
    "actual": 23.0
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "MT"
  },
  {
    "path": "data[0].items[0].unit_price",
    "expected": 25.0,
    "actual": 1410.0
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/MT"
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-6raw_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-6db_plus_raw117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-6db_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-5raw_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonsonnet-4-5db_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopus-4-6raw_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopus-4-6db_plus_raw117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopus-4-5raw_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopus-4-5db_plus_raw117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
single_product_multiple_shipment_medium.jsonopus-4-5db_only117
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2025-11-180",
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonsonnet-4-5raw_only117
[
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 8000.0
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "MT",
    "actual": "KG"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonsonnet-4-5db_plus_raw117
[
  {
    "path": "data[0].items[0].quantity",
    "expected": 8.0,
    "actual": 8000.0
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "MT",
    "actual": "KG"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-5raw_only117
[
  {
    "path": "data[0].items[0].quantity",
    "expected": 23.0,
    "actual": 23000.0
  },
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "MT",
    "actual": "KG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-6raw_only116
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6raw_only116
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6db_plus_raw116
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-6db_only116
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_simple.jsonsonnet-4-5db_plus_raw116
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-5db_plus_raw116
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-5db_only116
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
real_world_msgs_test_v2.jsonsonnet-4-6zero_shot115
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  },
  {
    "path": "data[0].billing_address",
    "expected": null,
    "actual": ""
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-6raw_only114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-6db_plus_raw114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-6db_only114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-5db_plus_raw114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-5db_only114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonopus-4-6raw_only114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonopus-4-6db_plus_raw114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonopus-4-5db_only114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
real_world_msgs_test_v2.jsonopus-4-6zero_shot114
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  },
  {
    "path": "data[0].billing_address",
    "expected": null,
    "actual": ""
  }
]
multiple_product_multiple_shipment_simple.jsonopus-4-5raw_only114
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Singapore",
    "actual": "CIF"
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-04-30",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-200",
    "actual": "PO-2025-11-200"
  }
]
single_product_multiple_shipment_simple.jsonsonnet-4-5raw_only113
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
single_product_multiple_shipment_simple.jsonopus-4-5db_plus_raw113
[
  {
    "path": "data[0].items[0].quantity_unit",
    "expected": "bags",
    "actual": "BAGS"
  },
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/bag",
    "actual": "USD/BAG"
  },
  {
    "path": "data[0].items[0].total",
    "expected": 375.0,
    "actual": 200.0
  },
  {
    "path": "data[0].do_date",
    "expected": "2026-02-28",
    "actual": "2026-05-31"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": "PO-2024-11-150",
    "actual": "PO-2025-11-150"
  }
]
real_world_msgs_test_v3.jsonsonnet-4-6raw_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonsonnet-4-6db_plus_raw113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonsonnet-4-6db_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonsonnet-4-5db_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopus-4-6raw_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopus-4-6db_plus_raw113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopus-4-6db_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopus-4-5raw_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopus-4-5db_plus_raw113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v3.jsonopus-4-5db_only113
[
  {
    "path": "data[0].items[0].pricing_unit",
    "expected": "USD/kg",
    "actual": "USD/KG"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": null,
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-6db_plus_raw113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-6db_only113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-5zero_shot113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonsonnet-4-5db_plus_raw113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonopus-4-6raw_only113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonopus-4-6db_plus_raw113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonopus-4-6db_only113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonopus-4-5zero_shot113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]
real_world_msgs_test_v2.jsonopus-4-5raw_only113
[
  {
    "path": "data[0].items[0].ship_term",
    "expected": "CIF Busan",
    "actual": "CIF"
  },
  {
    "path": "data[0].items[0].loading",
    "expected": "",
    "actual": "23MT/40'FCL"
  },
  {
    "path": "data[0].po_ref_no",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].payment_date",
    "expected": null,
    "actual": ""
  },
  {
    "path": "data[0].shipping_address",
    "expected": [
      "CIF Busan",
      "Busan"
    ],
    "actual": ""
  }
]