| multiple_product_multiple_shipment_medium.json | gemini:gemini-2.5-flash | raw_only | 1 | 17 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 23.75
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2027-02-28"
},
{
"path": "data[0].items[0].total",
"expected": null,
"actual": 285.0
}
] |
| multiple_product_multiple_shipment_medium.json | openai:5.4 | raw_only | 1 | 15 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].total",
"expected": null,
"actual": 3600.0
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "boxes",
"actual": "BOXES"
}
] |
| multiple_product_multiple_shipment_medium.json | openai:4.1 | zero_shot | 1 | 15 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD"
},
{
"path": "data[0].items[0].total",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "boxes",
"actual": "BOXES"
}
] |
| multiple_product_multiple_shipment_medium.json | gemini:gemini-2.5-flash | zero_shot | 1 | 15 | [
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 23.75
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/bag"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2027-02-28"
},
{
"path": "data[0].items[0].total",
"expected": null,
"actual": 285.0
},
{
"path": "data[0].items[1].unit_price",
"expected": null,
"actual": 11.4
}
] |
| single_product_multiple_shipment_complex.json | gemini:gemini-2.5-flash | zero_shot | 1 | 14 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[0].delivery_terms",
"expected": "FOB Singapore",
"actual": "FOB Singapore 100 Finance Ave"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2027-02-28"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
}
] |
| real_world_msgs_test_v2.json | openai:5-mini | zero_shot | 1 | 14 | [
{
"path": "data[0].items[0].quantity",
"expected": 23.0,
"actual": null
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "MT",
"actual": ""
},
{
"path": "data[0].items[0].ship_term",
"expected": "CIF",
"actual": ""
},
{
"path": "data[0].items[0].delivery_terms",
"expected": "CIF Busan",
"actual": ""
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": ""
}
] |
| multiple_product_multiple_shipment_simple.json | openai:5-mini | zero_shot | 1 | 12 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
}
] |
| multiple_product_multiple_shipment_medium.json | openai:5.4 | zero_shot | 1 | 12 | [
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": "100 Finance Ave Singapore 018989, Gate B"
},
{
"path": "data[0].items[0].total",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].items[1].unit_price",
"expected": null,
"actual": 360.0
},
{
"path": "data[0].items[1].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": "100 Finance Ave Singapore 018989, Gate B"
}
] |
| multiple_product_multiple_shipment_simple.json | openai:5.4 | zero_shot | 1 | 11 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
}
] |
| multiple_product_multiple_shipment_simple.json | openai:5.2 | zero_shot | 1 | 11 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
}
] |
| single_product_multiple_shipment_simple.json | openai:5.2 | zero_shot | 1 | 10 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 15.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-31",
"actual": ""
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
}
] |
| single_product_multiple_shipment_medium.json | openai:5.4 | zero_shot | 1 | 10 | [
{
"path": "data[0].items[0].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee medium roast"
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "BAG"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[0].packing",
"expected": "",
"actual": "1kg bags"
},
{
"path": "data[0].items[1].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee medium roast"
}
] |
| single_product_multiple_shipment_medium.json | openai:5.2 | zero_shot | 1 | 10 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee (medium roast)"
},
{
"path": "data[0].items[0].quantity",
"expected": 12.0,
"actual": 20.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-31",
"actual": ""
}
] |
| single_product_multiple_shipment_medium.json | gemini:gemini-2.5-flash | zero_shot | 1 | 10 | [
{
"path": "data[0].items[0].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee medium roast"
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[1].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee medium roast"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
}
] |
| real_world_msgs_test_v2.json | openai:5.2 | raw_only | 1 | 10 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": ""
},
{
"path": "data[0].do_date",
"expected": "2026-02-28",
"actual": ""
},
{
"path": "data[0].vendor_name",
"expected": "AG Lipids Pte Ltd",
"actual": ""
},
{
"path": "data[0].delivery_terms",
"expected": "CIF Busan",
"actual": ""
},
{
"path": "data[1].items[0].quantity",
"expected": 23.0,
"actual": 18.0
}
] |
| single_product_multiple_shipment_simple.json | openai:5.4 | zero_shot | 1 | 9 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 15.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-31",
"actual": ""
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
}
] |
| single_product_multiple_shipment_simple.json | openai:5-mini | zero_shot | 1 | 9 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
}
] |
| multiple_product_multiple_shipment_simple.json | gemini:gemini-2.5-flash | zero_shot | 1 | 9 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BOXES",
"actual": "boxes"
}
] |
| multiple_product_multiple_shipment_medium.json | gemini:gemini-2.5-pro | zero_shot | 1 | 9 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 23.75
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAGS"
}
] |
| multiple_product_multiple_shipment_medium.json | gemini:gemini-2.5-pro | raw_only | 1 | 9 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 23.75
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| single_product_single_shipment_medium.json | openai:5-mini | zero_shot | 1 | 8 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].items[0].delivery_terms",
"expected": "",
"actual": "100 Finance Ave Singapore 018989"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
}
] |
| single_product_multiple_shipment_complex.json | openai:5-mini | zero_shot | 1 | 8 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].items[2].quantity_unit",
"expected": "BAGS",
"actual": "bags"
}
] |
| multiple_product_multiple_shipment_simple.json | openai:4.1 | zero_shot | 1 | 8 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BOX",
"actual": "$"
}
] |
| multiple_product_multiple_shipment_medium.json | openai:5.2 | raw_only | 1 | 8 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 25.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| multiple_product_multiple_shipment_medium.json | openai:4.1 | raw_only | 1 | 8 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 285.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| single_product_single_shipment_medium.json | openai:5.2 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": ""
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | gemini:gemini-2.5-flash | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | openai:5.4 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "$/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St. Gate B"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | gemini:gemini-2.5-pro | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].delivery_terms",
"expected": "FOB Singapore",
"actual": "FOB Singapore, our delivery included."
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| real_world_msgs_test_v1.json | sonnet-4-6 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].description",
"expected": "soy lecithin powder",
"actual": "Soy Lecithin Powder"
},
{
"path": "data[0].items[0].unit_price",
"expected": 4.1,
"actual": 4100.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "",
"actual": "Busan"
},
{
"path": "data[0].items[0].loading",
"expected": "12MT/20'FCL",
"actual": "2x 12MT/20'FCL"
}
] |
| multiple_product_multiple_shipment_simple.json | sonnet-4-6 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/BAGS"
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].description",
"expected": "Assam tea",
"actual": "Assam Tea"
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
}
] |
| multiple_product_multiple_shipment_simple.json | sonnet-4-5 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/UNIT"
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BOX",
"actual": "USD/UNIT"
}
] |
| multiple_product_multiple_shipment_simple.json | openai:5.4 | raw_only | 1 | 7 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BOX",
"actual": ""
},
{
"path": "data[0].do_date",
"expected": "2026-06-30",
"actual": ""
}
] |
| multiple_product_multiple_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 7 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD"
}
] |
| multiple_product_multiple_shipment_medium.json | sonnet-4-6 | raw_only | 1 | 7 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 25.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| multiple_product_multiple_shipment_medium.json | sonnet-4-5 | raw_only | 1 | 7 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 23.75
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| multiple_product_multiple_shipment_medium.json | opus-4-6 | raw_only | 1 | 7 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 25.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| multiple_product_multiple_shipment_medium.json | opus-4-5 | raw_only | 1 | 7 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 25.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "",
"actual": "USD/BAG"
}
] |
| single_product_single_shipment_medium.json | openai:5.4 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | sonnet-4-6 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | sonnet-4-5 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | sonnet-4-5 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | opus-4-6 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | opus-4-5 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | openai:5.4 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | openai:5.2 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": ""
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": ""
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": ""
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | openai:5.2 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | gemini:gemini-2.5-flash | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "$ per bag"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].payment_date",
"expected": "",
"actual": "Net 30 from delivery"
},
{
"path": "data[0].delivery_terms",
"expected": "FOB Singapore",
"actual": "FOB Singapore, our delivery included"
}
] |
| single_product_multiple_shipment_simple.json | gemini:gemini-2.5-flash | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].payment_date",
"expected": "",
"actual": "Net 30 from last delivery"
}
] |
| single_product_multiple_shipment_medium.json | openai:5-mini | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "$/bag"
},
{
"path": "data[0].do_date",
"expected": "2026-05-31",
"actual": ""
}
] |
| single_product_multiple_shipment_complex.json | opus-4-5 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[2].quantity_unit",
"expected": "BAGS",
"actual": "bags"
}
] |
| real_world_msgs_test_v3.json | openai:5.4 | zero_shot | 1 | 6 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 20.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "$/KG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
}
] |
| real_world_msgs_test_v3.json | openai:5-mini | zero_shot | 1 | 6 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 20.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/kg"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
}
] |
| real_world_msgs_test_v2.json | openai:5-mini | raw_only | 1 | 6 | [
{
"path": "data",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": ""
},
{
"path": "data[0].do_date",
"expected": "2026-02-28",
"actual": ""
},
{
"path": "data[0].vendor_name",
"expected": "AG Lipids Pte Ltd",
"actual": ""
}
] |
| multiple_product_multiple_shipment_simple.json | opus-4-6 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].description",
"expected": "Assam tea",
"actual": "Assam Tea"
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].total",
"expected": 240.0,
"actual": 4800.0
}
] |
| multiple_product_multiple_shipment_simple.json | opus-4-5 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BOX",
"actual": ""
}
] |
| multiple_product_multiple_shipment_simple.json | openai:5.2 | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].total",
"expected": 240.0,
"actual": 4800.0
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| multiple_product_multiple_shipment_simple.json | openai:5-mini | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
},
{
"path": "data[0].items[1].total",
"expected": 240.0,
"actual": 4800.0
},
{
"path": "data[0].do_date",
"expected": "2026-06-30",
"actual": ""
}
] |
| multiple_product_multiple_shipment_medium.json | sonnet-4-5 | zero_shot | 1 | 6 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": ""
},
{
"path": "data[0].items[0].unit_price",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].do_date",
"expected": "2026-03-05",
"actual": "2026-02-28"
}
] |
| multiple_product_multiple_shipment_medium.json | openai:5.2 | zero_shot | 1 | 6 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].do_date",
"expected": "2026-03-05",
"actual": "2026-02-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| multiple_product_multiple_shipment_medium.json | openai:5-mini | raw_only | 1 | 6 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "boxes",
"actual": "BOXES"
},
{
"path": "data[0].items[2].description",
"expected": "Copy paper",
"actual": "copy paper"
},
{
"path": "data[0].items[2].quantity_unit",
"expected": "reams",
"actual": "REAMS"
},
{
"path": "data[0].do_date",
"expected": "2026-03-05",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | openai:5.4 | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | openai:5.2 | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | opus-4-6 | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | openai:5-mini | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | gemini:gemini-2.5-flash | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": ""
}
] |
| single_product_multiple_shipment_medium.json | sonnet-4-5 | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": ""
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": ""
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": ""
},
{
"path": "data[0].do_date",
"expected": "2026-05-31",
"actual": ""
}
] |
| single_product_multiple_shipment_medium.json | openai:4.1 | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee (Medium Roast)"
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "BAG"
},
{
"path": "data[0].items[1].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee (Medium Roast)"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "BAG"
},
{
"path": "data[0].do_date",
"expected": "2026-05-31",
"actual": ""
}
] |
| single_product_multiple_shipment_complex.json | openai:5.4 | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].items[2].pricing_unit",
"expected": "USD/BAG",
"actual": "$/BAG"
},
{
"path": "data[0].do_date",
"expected": "2026-03-10",
"actual": ""
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_multiple_shipment_complex.json | gemini:gemini-2.5-pro | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2027-02-28"
},
{
"path": "data[0].items[1].shipment_date",
"expected": "2026-03-04",
"actual": "2027-03-04"
},
{
"path": "data[0].items[2].shipment_date",
"expected": "2026-03-10",
"actual": "2027-03-10"
},
{
"path": "data[0].do_date",
"expected": "2026-03-10",
"actual": "2027-03-10"
},
{
"path": "data[0].po_date",
"expected": "",
"actual": "2025-11-28"
}
] |
| single_product_multiple_shipment_complex.json | gemini:gemini-2.5-pro | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2027-02-28"
},
{
"path": "data[0].items[1].shipment_date",
"expected": "2026-03-04",
"actual": "2027-03-04"
},
{
"path": "data[0].items[2].shipment_date",
"expected": "2026-03-10",
"actual": "2027-03-10"
},
{
"path": "data[0].do_date",
"expected": "2026-03-10",
"actual": "2027-03-10"
},
{
"path": "data[0].billing_address",
"expected": "",
"actual": "Leonardo da Vinci"
}
] |
| single_product_multiple_shipment_complex.json | gemini:gemini-2.5-flash | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2027-02-28"
},
{
"path": "data[0].items[1].shipment_date",
"expected": "2026-03-04",
"actual": "2027-03-04"
},
{
"path": "data[0].items[2].shipment_date",
"expected": "2026-03-10",
"actual": "2027-03-10"
},
{
"path": "data[0].do_date",
"expected": "2026-03-10",
"actual": ""
},
{
"path": "data[0].payment_date",
"expected": "",
"actual": "Net 30 Days"
}
] |
| real_world_msgs_test_v3.json | sonnet-4-6 | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
},
{
"path": "data[0].items[1].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[1].total",
"expected": 144000.0,
"actual": null
},
{
"path": "data[0].do_date",
"expected": "",
"actual": "2026-03-31"
}
] |
| real_world_msgs_test_v3.json | sonnet-4-6 | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
},
{
"path": "data[0].items[1].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[1].total",
"expected": 144000.0,
"actual": null
},
{
"path": "data[0].do_date",
"expected": "",
"actual": "2026-03-31"
}
] |
| real_world_msgs_test_v3.json | openai:5.4 | raw_only | 1 | 5 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 20.0
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| real_world_msgs_test_v3.json | openai:5.2 | zero_shot | 1 | 5 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 20.0
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": 240000.0
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| real_world_msgs_test_v3.json | openai:5.2 | raw_only | 1 | 5 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 20.0
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| real_world_msgs_test_v3.json | openai:5-mini | raw_only | 1 | 5 | [
{
"path": "data[0].items",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 8.0,
"actual": 20.0
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| real_world_msgs_test_v2.json | sonnet-4-6 | raw_only | 1 | 5 | [
{
"path": "data[0].vendor_name",
"expected": "AG Lipids Pte Ltd",
"actual": "Van Beethoven"
},
{
"path": "data[1].items[0].unit_price",
"expected": 4.2,
"actual": 4200.0
},
{
"path": "data[1].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[1].items[0].loading",
"expected": "",
"actual": "18MT/40'FCL"
},
{
"path": "data[1].vendor_name",
"expected": "AG Lipids Pte Ltd",
"actual": "Van Beethoven"
}
] |
| real_world_msgs_test_v2.json | openai:4.1 | zero_shot | 1 | 5 | [
{
"path": "data",
"expected_len": 2,
"actual_len": 1
},
{
"path": "data[0].items",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-02-28",
"actual": "2026-02-29"
},
{
"path": "data[0].do_date",
"expected": "2026-02-28",
"actual": "2026-02-29"
},
{
"path": "data[0].vendor_name",
"expected": "AG Lipids Pte Ltd",
"actual": ""
}
] |
| real_world_msgs_test_v1.json | openai:5-mini | raw_only | 1 | 5 | [
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/kg"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-11-15",
"actual": ""
},
{
"path": "data[0].items[0].loading",
"expected": "12MT/20'FCL",
"actual": ""
},
{
"path": "data[0].do_date",
"expected": "2026-11-15",
"actual": ""
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| real_world_msgs_test_v1.json | gemini:gemini-2.5-pro | raw_only | 1 | 5 | [
{
"path": "data[0].items",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items[0].quantity",
"expected": 24.0,
"actual": 12.0
},
{
"path": "data[0].items[0].shipping_address",
"expected": "",
"actual": "Busan"
},
{
"path": "data[0].items[0].total",
"expected": 98400.0,
"actual": 49200.0
},
{
"path": "data[0].billing_address",
"expected": "",
"actual": "Leonardo da Vinci, "
}
] |
| multiple_product_multiple_shipment_simple.json | gemini:gemini-2.5-pro | zero_shot | 1 | 5 | [
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/BAGS"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "Singapore"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BOX",
"actual": "USD/BOXES"
},
{
"path": "data[0].items[1].shipping_address",
"expected": "100 Finance Ave",
"actual": "Singapore"
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave",
"actual": ""
}
] |
| multiple_product_multiple_shipment_medium.json | opus-4-5 | zero_shot | 1 | 5 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].total",
"expected": null,
"actual": 300.0
},
{
"path": "data[0].do_date",
"expected": "2026-03-05",
"actual": "2026-02-28"
}
] |
| single_product_single_shipment_medium.json | sonnet-4-5 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | sonnet-4-5 | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | opus-4-6 | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | openai:5-mini | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": "Net 30 Days"
}
] |
| single_product_single_shipment_medium.json | gemini:gemini-2.5-flash | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-11-28"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave Singapore 018989",
"actual": ""
}
] |
| single_product_single_shipment_complex.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
}
] |
| single_product_single_shipment_complex.json | opus-4-5 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | openai:4.1 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | openai:4.1 | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_single_shipment_complex.json | gemini:gemini-2.5-pro | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/bag",
"actual": "USD/BAG"
},
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
}
] |
| single_product_multiple_shipment_simple.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
}
] |
| single_product_multiple_shipment_simple.json | openai:4.1 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].items[1].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
},
{
"path": "data[0].do_date",
"expected": "2026-06-30",
"actual": ""
},
{
"path": "data[0].shipping_address",
"expected": "100 Finance Ave",
"actual": "100 Finance Ave."
}
] |
| single_product_multiple_shipment_medium.json | openai:5.4 | raw_only | 1 | 4 | [
{
"path": "data[0].items[0].packing",
"expected": "",
"actual": "1kg bags"
},
{
"path": "data[0].items[1].packing",
"expected": "",
"actual": "1kg bags"
},
{
"path": "data[0].do_date",
"expected": "2026-05-31",
"actual": ""
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| single_product_multiple_shipment_complex.json | sonnet-4-5 | raw_only | 1 | 4 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 3
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].do_date",
"expected": "2026-03-10",
"actual": "2026-02-28"
},
{
"path": "data[0].shipping_address",
"expected": "",
"actual": "100 Finance Ave"
}
] |
| single_product_multiple_shipment_complex.json | openai:5.2 | zero_shot | 1 | 4 | [
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity",
"expected": 14.0,
"actual": 32.0
},
{
"path": "data[0].items[0].total",
"expected": 318.5,
"actual": 728.0
},
{
"path": "data[0].vendor_name",
"expected": "Van Beethoven",
"actual": ""
}
] |
| real_world_msgs_test_v3.json | openai:4.1 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-03-31",
"actual": ""
},
{
"path": "data[0].items[0].total",
"expected": 96000.0,
"actual": null
},
{
"path": "data[0].items[1].shipment_date",
"expected": "2027-05-31",
"actual": ""
},
{
"path": "data[0].items[1].total",
"expected": 144000.0,
"actual": null
}
] |