| real_world_msgs_test_v3.json | sonnet-4-6 | zero_shot | 1 | 9 | [
{
"path": "data[0].items[0].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[0].unit_price",
"expected": 12.0,
"actual": 12000.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[0].items[0].ship_term",
"expected": "",
"actual": "FOB"
},
{
"path": "data[0].items[1].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
}
] |
| real_world_msgs_test_v3.json | sonnet-4-6 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[0].unit_price",
"expected": 12.0,
"actual": 12000.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[0].items[1].description",
"expected": "lecithin fat powder",
"actual": "Lecithin Fat Powder"
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 12000.0
}
] |
| multiple_product_multiple_shipment_simple.json | sonnet-4-6 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/BAGS"
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].description",
"expected": "Assam tea",
"actual": "Assam Tea"
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
}
] |
| multiple_product_multiple_shipment_simple.json | sonnet-4-6 | zero_shot | 1 | 7 | [
{
"path": "data[0].items[0].unit_price",
"expected": 25.0,
"actual": 250.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/BAGS"
},
{
"path": "data[0].items[0].total",
"expected": 250.0,
"actual": 2500.0
},
{
"path": "data[0].items[1].description",
"expected": "Assam tea",
"actual": "Assam Tea"
},
{
"path": "data[0].items[1].unit_price",
"expected": 12.0,
"actual": 240.0
}
] |
| real_world_msgs_test_v1.json | sonnet-4-6 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].description",
"expected": "soy lecithin powder",
"actual": "Soy Lecithin Powder"
},
{
"path": "data[0].items[0].unit_price",
"expected": 4.1,
"actual": 4100.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "",
"actual": "Busan"
},
{
"path": "data[0].items[0].loading",
"expected": "12MT/20'FCL",
"actual": "2x 12MT/20'FCL"
}
] |
| real_world_msgs_test_v1.json | sonnet-4-6 | zero_shot | 1 | 6 | [
{
"path": "data[0].items[0].description",
"expected": "soy lecithin powder",
"actual": "Soy Lecithin Powder"
},
{
"path": "data[0].items[0].unit_price",
"expected": 4.1,
"actual": 4100.0
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "",
"actual": "Busan"
},
{
"path": "data[0].items[0].loading",
"expected": "12MT/20'FCL",
"actual": "2x 12MT/20'FCL"
}
] |
| single_product_single_shipment_complex.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
}
] |
| single_product_single_shipment_complex.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].items[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
},
{
"path": "data[0].do_date",
"expected": "2025-11-28",
"actual": "2026-11-28"
},
{
"path": "data[0].shipping_address",
"expected": "352 Indiana Jones St.",
"actual": "352 Indiana Jones St"
}
] |
| single_product_multiple_shipment_simple.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
}
] |
| single_product_multiple_shipment_simple.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data[0].items[0].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[0].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
},
{
"path": "data[0].items[1].quantity_unit",
"expected": "BAGS",
"actual": "bags"
},
{
"path": "data[0].items[1].pricing_unit",
"expected": "USD/BAG",
"actual": "USD/bag"
}
] |
| multiple_product_multiple_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].do_date",
"expected": "2026-03-05",
"actual": "2026-02-28"
}
] |
| multiple_product_multiple_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 4 | [
{
"path": "data",
"expected_len": 1,
"actual_len": 2
},
{
"path": "data[0].items",
"expected_len": 3,
"actual_len": 1
},
{
"path": "data[0].items[0].quantity_unit",
"expected": "bags",
"actual": "BAGS"
},
{
"path": "data[0].do_date",
"expected": "2026-03-05",
"actual": "2026-02-28"
}
] |
| single_product_single_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 3 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
}
] |
| single_product_single_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 3 | [
{
"path": "data[0].items[0].shipment_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].do_date",
"expected": "2026-05-28",
"actual": "2026-05-31"
},
{
"path": "data[0].payment_date",
"expected": "Net 30 from delivery",
"actual": ""
}
] |
| single_product_multiple_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 3 | [
{
"path": "data[0].items[0].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee (medium roast)"
},
{
"path": "data[0].items[1].description",
"expected": "KNM Coffee",
"actual": "KNM Coffee (medium roast)"
},
{
"path": "data[0].do_date",
"expected": "2026-05-31",
"actual": ""
}
] |
| real_world_msgs_test_v2.json | sonnet-4-6 | zero_shot | 1 | 3 | [
{
"path": "data[1].items[0].unit_price",
"expected": 4.2,
"actual": 4200.0
},
{
"path": "data[1].items[0].pricing_unit",
"expected": "USD/KG",
"actual": "USD/MT"
},
{
"path": "data[1].items[0].loading",
"expected": "",
"actual": "18MT/40'FCL"
}
] |
| real_world_msgs_test_v2.json | sonnet-4-6 | zero_shot | 1 | 2 | [
{
"path": "data[1].items[0].loading",
"expected": "",
"actual": "18MT/40'FCL"
},
{
"path": "data[1].items[0].total",
"expected": 96600.0,
"actual": null
}
] |
| single_product_multiple_shipment_medium.json | sonnet-4-6 | zero_shot | 1 | 1 | [
{
"path": "data[0].do_date",
"expected": "2026-05-31",
"actual": ""
}
] |