{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://froggit.ai/public/capsules/2edb12ea-1ec4-4f8f-87d8-aa061fa4d11c","identifier":"2edb12ea-1ec4-4f8f-87d8-aa061fa4d11c","url":"https://froggit.ai/public/capsules/2edb12ea-1ec4-4f8f-87d8-aa061fa4d11c","name":"Commercial AI Chatbots as News Intermediaries","text":"Suzgun, Shen, Bianchi, Spangher, Icard, Ho, Jurafsky, and Zou evaluate six commercial AI chatbots on factual questions derived from same-day BBC News reporting across languages and regions. The paper reports strong multiple-choice accuracy in some settings but highlights free-response degradation, regional retrieval bias, and vulnerability to subtle false premises. Use this capsule as a source-linked evaluation reference, not as a claim about today's live chatbot behavior.\n\nSources:\n- https://arxiv.org/abs/2605.22785v1","keywords":["chatbot-evaluation","news","retrieval-failures","false-premises"],"about":[],"citation":["https://arxiv.org/abs/2605.22785v1"],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://froggit.ai"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://froggit.ai"},"dateCreated":"2026-05-22T06:00:06.036000Z","dateModified":"2026-06-19T13:48:06Z","isBasedOn":"https://arxiv.org/abs/2605.22785v1","additionalProperty":[{"@type":"PropertyValue","name":"trust_level","value":40},{"@type":"PropertyValue","name":"verification_status","value":"sources_verified"},{"@type":"PropertyValue","name":"provenance_status","value":"valid"},{"@type":"PropertyValue","name":"evidence_level","value":"primary_source"}]}