45483 lines
1.3 MiB
Executable File
45483 lines
1.3 MiB
Executable File
[
|
|
{
|
|
"question": "What is the book authors first name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Susan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the book authors last name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Blackmore"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who's name is on the cake?",
|
|
"answer": "a dog",
|
|
"gt_answers": [
|
|
"Chris"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000334557.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the writing on the bottom of the picture read?",
|
|
"answer": "big kites",
|
|
"gt_answers": [
|
|
"Big Kites"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341689.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is advertised on the red sign?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Helly Hansen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359981.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the Pacific Hwy Exit going to?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"North Sydney"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the Falcon Street exit going to?",
|
|
"answer": "falcon street",
|
|
"gt_answers": [
|
|
"Crows Nest"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Mosman Manly exit going to?",
|
|
"answer": "manly",
|
|
"gt_answers": [
|
|
"Chatswood Epping"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402661.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the logo on the upper left say?",
|
|
"answer": "skateboards",
|
|
"gt_answers": [
|
|
"MyProfe"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376967.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is this?",
|
|
"answer": "air france",
|
|
"gt_answers": [
|
|
"Airfrance"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does Espressa sell?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"pizza"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000457121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cost for delivery of Espressa Pizza?",
|
|
"answer": "\u00a36 for a large pizza",
|
|
"gt_answers": [
|
|
"FREE DELIVERY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000457121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the right say?",
|
|
"answer": "colab",
|
|
"gt_answers": [
|
|
"Collabora"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341583.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the sponsor of this tournament",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"blackrock"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000400737.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top left of the white board?",
|
|
"answer": "customer service",
|
|
"gt_answers": [
|
|
"Book and Stationary Event of the Year!",
|
|
"BOOK AND STATIONERY EVENT OF THE YEAR"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_95.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of mini jet?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"N5226F"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000214792.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is holding the mini jet?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"U.S. AIR FORCE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000214792.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What piece of fruit is the man holding?",
|
|
"answer": "banana",
|
|
"gt_answers": [
|
|
"banana"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356649.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many men are in the picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"two"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356649.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year does it say on the bottom right?",
|
|
"answer": "2008",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1914.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the tag?",
|
|
"answer": "golden",
|
|
"gt_answers": [
|
|
"Goldfiel"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01685808_4314.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of this product?",
|
|
"answer": "pro",
|
|
"gt_answers": [
|
|
"Pride."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the size of the items according to the label?",
|
|
"answer": "8 oz",
|
|
"gt_answers": [
|
|
"Jumbo."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the cooking device?",
|
|
"answer": "crock pot",
|
|
"gt_answers": [
|
|
"CROCK-POT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03133878_12981.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the yellow number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000560754.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what event is this?",
|
|
"answer": "birthday",
|
|
"gt_answers": [
|
|
"birthday"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407270.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is mentioned in the text",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"black"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342206.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number above the windshield?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"160"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450050.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus heading?",
|
|
"answer": "switzerland",
|
|
"gt_answers": [
|
|
"MONCLOA ARAVACA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450050.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the motorcycle's license plate?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"NXU4489",
|
|
"NXU 4489"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000543407.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign read?",
|
|
"answer": "stop railroad crossing",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000446618.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bar?",
|
|
"answer": "the temple bar",
|
|
"gt_answers": [
|
|
"The Temple Bar"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000168717.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which month is in the caption?",
|
|
"answer": "august",
|
|
"gt_answers": [
|
|
"aug"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000099956.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the tail wing?",
|
|
"answer": "virgin america",
|
|
"gt_answers": [
|
|
"Virgin"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom of the snowboard say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Custom"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000439676.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is listed on the sign to the right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Burton"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000439676.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the copyright on this image say?",
|
|
"answer": "no copyright",
|
|
"gt_answers": [
|
|
"Bridget Samuels 2011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329356.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of car is this?",
|
|
"answer": "taxi",
|
|
"gt_answers": [
|
|
"Taxi"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02930766_29871.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front window?",
|
|
"answer": "taxi",
|
|
"gt_answers": [
|
|
"Not in Service"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02930766_29871.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is the copyright?",
|
|
"answer": "1994",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03868863_12028.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the AVE in the picture?",
|
|
"answer": "madison st",
|
|
"gt_answers": [
|
|
"1st",
|
|
"1st AVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330193.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the ST in the picture?",
|
|
"answer": "madison st",
|
|
"gt_answers": [
|
|
"Madison",
|
|
"Madison ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330193.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"matt dwen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414069.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What hair product brand is sponsoring this match?",
|
|
"answer": "garnier",
|
|
"gt_answers": [
|
|
"Garnier"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377076.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the display sign?",
|
|
"answer": "penguin",
|
|
"gt_answers": [
|
|
"Office Equipment"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text on the traffic sign?",
|
|
"answer": "school bus stop",
|
|
"gt_answers": [
|
|
"SCHOOL BUS STOP AHEAD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360826.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of bike?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"repsol"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340213.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is presented on the black flag?",
|
|
"answer": "b",
|
|
"gt_answers": [
|
|
"M"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "There are two numbers presented on the flag. What are they?",
|
|
"answer": "1 and 2",
|
|
"gt_answers": [
|
|
"09"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is presented on the flag?",
|
|
"answer": "the coat of arms",
|
|
"gt_answers": [
|
|
"m '09"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the street curb?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"311"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368686.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the switch?",
|
|
"answer": "smd",
|
|
"gt_answers": [
|
|
"C & K",
|
|
"C & K C.R"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04372370_13773.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of of donut is this?",
|
|
"answer": "pumpkin chocolate chip",
|
|
"gt_answers": [
|
|
"Pumpkin chocolate chip donuts",
|
|
"PUMPKIN CHOCOLATE CHIP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top left button of this remote control read?",
|
|
"answer": "mute",
|
|
"gt_answers": [
|
|
"Code Search"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_15147.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "stop men at work",
|
|
"gt_answers": [
|
|
"Stop Men At Work"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329233.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name visible on the side wall?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"OLYMPUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410453.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number code written on the train?",
|
|
"answer": "sc",
|
|
"gt_answers": [
|
|
"528818"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03393912_7802.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company written on the train",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"Golden West Service"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03393912_7802.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What initials are on the top of the machine?",
|
|
"answer": "lg",
|
|
"gt_answers": [
|
|
"LG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04554684_2945.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who does the Wholesale Nursey sell to?",
|
|
"answer": "the public",
|
|
"gt_answers": [
|
|
"PUBLIC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000086763.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many skateboards can you see?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"3",
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign board show?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which Avenue is displayed near the stop board?",
|
|
"answer": "avenue",
|
|
"gt_answers": [
|
|
"Avenue 55",
|
|
"55"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What avenue is shown?",
|
|
"answer": "broadway",
|
|
"gt_answers": [
|
|
"5 Av"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000327020.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website on the top left of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"EUtouring.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000334981.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus' route number?",
|
|
"answer": "no route number",
|
|
"gt_answers": [
|
|
"41"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713880.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the year in the bottom right of this photo?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000142299.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What track number is the train at?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360737.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the green street sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"GRANADA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414181.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom line of the sign say?",
|
|
"answer": "new west",
|
|
"gt_answers": [
|
|
"New West"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354360.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the dish called?",
|
|
"answer": "chicken biryani",
|
|
"gt_answers": [
|
|
"Chicken Dijon"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004042.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the player's t-shirt read?",
|
|
"answer": "i love tennis",
|
|
"gt_answers": [
|
|
"ADI DOES"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is printed on the jersey of the batter?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000454158.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of booth is the man standing in?",
|
|
"answer": "pay phone",
|
|
"gt_answers": [
|
|
"phone"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326320.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the cap?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"A4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000324000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the latop?",
|
|
"answer": "the name of the car",
|
|
"gt_answers": [
|
|
"ETATUNING.COM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000324000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the name tag?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"VENDO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000324000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the train going to?",
|
|
"answer": "the station",
|
|
"gt_answers": [
|
|
"Old Town"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342512.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train?",
|
|
"answer": "331",
|
|
"gt_answers": [
|
|
"1033"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342512.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in front the store?",
|
|
"answer": "minx",
|
|
"gt_answers": [
|
|
"Minx"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_689.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name mentioned in the board ?",
|
|
"answer": "little gilbert street",
|
|
"gt_answers": [
|
|
"Little Gilbert st"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the large pink text say?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"Pardon Me Prime Minister"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319257.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of bananas pictured",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Chiquita"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000037377.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sku number on the bananas",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"4011"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000037377.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the word on the pot?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Boroplast"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000260827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is under the main road sign?",
|
|
"answer": "a building",
|
|
"gt_answers": [
|
|
"A 454"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408391.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is on the banner behind the player?",
|
|
"answer": "bnp paribas",
|
|
"gt_answers": [
|
|
"BNP Paribas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349360.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline does this plane belong to?",
|
|
"answer": "delta airlines",
|
|
"gt_answers": [
|
|
"DELTA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_5581.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say to start the school year doing?",
|
|
"answer": "writing a thank you note",
|
|
"gt_answers": [
|
|
"Celebrating"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much prize money is there?",
|
|
"answer": "$ 500,000 in prizes",
|
|
"gt_answers": [
|
|
"$500.00"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the train",
|
|
"answer": "stl",
|
|
"gt_answers": [
|
|
"PACER STACKTRAIN",
|
|
"STACKTRAIN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is below Nottingham City?",
|
|
"answer": "city",
|
|
"gt_answers": [
|
|
"Transport"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1591990.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where can I buy those?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"www.zzwldz.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04116512_6856.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bus company?",
|
|
"answer": "red bus",
|
|
"gt_answers": [
|
|
"Arriva"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the white jersey?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350038.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was the image copyrighted?",
|
|
"answer": "2000",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02124075_7973.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who holds the copyright?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"charlotte edwards"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02124075_7973.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color are the digits on the train's sign?",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"red",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340432.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the girls sweatshirt say?",
|
|
"answer": "george washington university",
|
|
"gt_answers": [
|
|
"The George Washington University"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342984.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which tennis tournament was this picture taken at?",
|
|
"answer": "indian wells open",
|
|
"gt_answers": [
|
|
"ATP World Tour"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000043431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name at the bottom of the booth?",
|
|
"answer": "sams club",
|
|
"gt_answers": [
|
|
"Organi",
|
|
"ORGANI SINGAPORE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_81.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What jersey number is the boy in the front of the image wearing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"56"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000348116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What jersey number is the boy in the back of the image wearing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"46"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000348116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the label given",
|
|
"answer": "elapid snake",
|
|
"gt_answers": [
|
|
"kevin messenger 2010"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01728920_18655.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which animal placed on there",
|
|
"answer": "snake",
|
|
"gt_answers": [
|
|
"snake"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01728920_18655.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photography name?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"WWGE Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328682.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green street sign say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"42 st",
|
|
"W 42"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the billboard say?",
|
|
"answer": "free million dollar poker tournament",
|
|
"gt_answers": [
|
|
"MILLION DOLLAR POKER TOURNAMENT",
|
|
"MILLION DOLLAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this train going?",
|
|
"answer": "to new york",
|
|
"gt_answers": [
|
|
"To New York",
|
|
"New York"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416461.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue sign at the top of the building say?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"Point"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713741.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the octagonal sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406596.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number given in his t-shirt?",
|
|
"answer": "20",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365461.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Does the establishment offer wifi?",
|
|
"answer": "yes",
|
|
"gt_answers": [
|
|
"Free Wifi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the side of the white vehicle in the picture?",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"POLICE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03977966_28170.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is printed on the tailgate of this pick up truck?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"MoToRZTV"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What center is this?",
|
|
"answer": "it is a shopping center",
|
|
"gt_answers": [
|
|
"Art"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3380.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the book?",
|
|
"answer": "a book of secrets",
|
|
"gt_answers": [
|
|
"PENDRAGON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000069339.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the author of the book?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MACHALE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000069339.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was Berlin's established?",
|
|
"answer": "1895",
|
|
"gt_answers": [
|
|
"1883"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What season comes after winter?",
|
|
"answer": "spring",
|
|
"gt_answers": [
|
|
"spring"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000441943.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the on the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Galaxy"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000478.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THIS BUILD",
|
|
"answer": "shanghai city hall",
|
|
"gt_answers": [
|
|
"CALCADA DOS QUARTEIS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437138.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THIS SIGN BOARD?",
|
|
"answer": " ",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437138.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed 3 times on the building?",
|
|
"answer": "ice cream",
|
|
"gt_answers": [
|
|
"DIESEL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000119882.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the identification number on the blue part of the boat",
|
|
"answer": "hms swan",
|
|
"gt_answers": [
|
|
"rnli16-23",
|
|
"RNLI 16-23"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "do not enter",
|
|
"gt_answers": [
|
|
"Do Not Enter"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403084.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is wrirrten on the dorr frame?",
|
|
"answer": "adirondacks",
|
|
"gt_answers": [
|
|
"Progress",
|
|
"PROGRESS AUTOMATIC DOOR, INC."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04239074_374.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand name?",
|
|
"answer": "vitamin c",
|
|
"gt_answers": [
|
|
"Jiffy"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030466.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of road is Park St?",
|
|
"answer": "one way street",
|
|
"gt_answers": [
|
|
"One Way",
|
|
"PARK ST ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the slogan in the white corner?",
|
|
"answer": "masshouse changes",
|
|
"gt_answers": [
|
|
"Making more of the city"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is in the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BURLINGTON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000482295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does Almuerzo Criollo start?",
|
|
"answer": "11 am",
|
|
"gt_answers": [
|
|
"12:00 PM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325200.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What group?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"First"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02086910_2399.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what year was the picture taken?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000448143.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who copyrighted the photograph?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"David Quick"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000448143.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first 4 letters of the words on the backboard?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"High"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000448143.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What climate is the suggested storage in this image?",
|
|
"answer": "cold",
|
|
"gt_answers": [
|
|
"Cold"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_956.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the walker on the phone?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"16957"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408919.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the poster to the left of the picture?",
|
|
"answer": "brewpub",
|
|
"gt_answers": [
|
|
"Tommyknocker Imperial",
|
|
"IMPERIAL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_11421.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the white jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"32"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000574857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the slogan on the beverage bottles?",
|
|
"answer": "i'm thirsty",
|
|
"gt_answers": [
|
|
"BRING IT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056647.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the blue rectangle on the box inside the refrigerator?",
|
|
"answer": "ice maker",
|
|
"gt_answers": [
|
|
"3D",
|
|
"30"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056647.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "when was the picture taken?",
|
|
"answer": "in the kitchen",
|
|
"gt_answers": [
|
|
"02/10/2009"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000271495.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What hot dog company is being shown in this picture?",
|
|
"answer": "person hot dogs",
|
|
"gt_answers": [
|
|
"nathan's"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000034785.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the basket of fires?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"95"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000034785.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on the brown shirt?",
|
|
"answer": "microsoft",
|
|
"gt_answers": [
|
|
"Longhorn's Breath Saloon"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368327.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many cheeses are on the lasagna?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011382.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is NEW?",
|
|
"answer": "qr code",
|
|
"gt_answers": [
|
|
"ERA"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "You should fly your own what?",
|
|
"answer": "airplane",
|
|
"gt_answers": [
|
|
"Flag"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the phrase under the New Era logo?",
|
|
"answer": "new era",
|
|
"gt_answers": [
|
|
"Fly your own flag"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is it Irvine Center Dr or Ave?",
|
|
"answer": "ave",
|
|
"gt_answers": [
|
|
"Dr"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000158729.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the woman's shirt?",
|
|
"answer": "banana",
|
|
"gt_answers": [
|
|
"TJOOK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325179.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the written on the plane?",
|
|
"answer": "british airways",
|
|
"gt_answers": [
|
|
"British airways"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year on the bottle?",
|
|
"answer": "1900",
|
|
"gt_answers": [
|
|
"1900"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04579145_5777.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does it cost for egg salad?",
|
|
"answer": "it costs $4",
|
|
"gt_answers": [
|
|
"$5.25"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07565083_361.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company name on the advertisement?",
|
|
"answer": "hyundai",
|
|
"gt_answers": [
|
|
"Mike Hynan Hynan Coach Services"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the team in blue?",
|
|
"answer": "hyun",
|
|
"gt_answers": [
|
|
"Eagles"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000240490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on the hat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000240490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sandwich is printed on the wall?",
|
|
"answer": "philly cheese steak",
|
|
"gt_answers": [
|
|
"Panini"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000394529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is displayed in the upper left of the image?",
|
|
"answer": "a city street",
|
|
"gt_answers": [
|
|
"22nd Street"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159757.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is at the bottom of the picture?",
|
|
"answer": "printer",
|
|
"gt_answers": [
|
|
"adoos"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03924679_16021.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "whats the name on the wall?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"hertz"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000431012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the single letter after hertz?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"r"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000431012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do you need to get?",
|
|
"answer": "horse",
|
|
"gt_answers": [
|
|
"Reading"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title for the movie depicted on the pictures on the bus?",
|
|
"answer": "jack reacher",
|
|
"gt_answers": [
|
|
"Jack Reacher"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320314.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flavor are the ALTOIDS?",
|
|
"answer": "mint",
|
|
"gt_answers": [
|
|
"APPLE SOURS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02092339_1691.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city operates the fire truck?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"MONTVALE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03345487_7060.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business written on the side of the horse carriage?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"DOUBLE M FARMS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000251697.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What state is written on the side of the horse carriage?",
|
|
"answer": "michigan",
|
|
"gt_answers": [
|
|
"Iowa"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000251697.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the website?",
|
|
"answer": "orange graphics",
|
|
"gt_answers": [
|
|
"OrangeGraphics"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_29955.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the plane?",
|
|
"answer": "spitfire",
|
|
"gt_answers": [
|
|
"HTOE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000073892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the airplane",
|
|
"answer": "indigo",
|
|
"gt_answers": [
|
|
"Indigo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign?",
|
|
"answer": "saint",
|
|
"gt_answers": [
|
|
"SAINT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361725.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What symbol is on the players arm band?",
|
|
"answer": "a tennis ball",
|
|
"gt_answers": [
|
|
"nike"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361633.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the participants number?",
|
|
"answer": "200,000",
|
|
"gt_answers": [
|
|
"119"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04456115_5677.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What symbol is on the sign in blue?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"?"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406205.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the sign in green?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406205.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the bus?",
|
|
"answer": "volvo",
|
|
"gt_answers": [
|
|
"Mercedes-Benz"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000303340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company operates the bus?",
|
|
"answer": "nederlandse vervoer",
|
|
"gt_answers": [
|
|
"Citaro"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000303340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of business is Walthall?",
|
|
"answer": "manufacturer",
|
|
"gt_answers": [
|
|
"hotel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of store in between the flag and streetlight?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BOMARS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the plane say?",
|
|
"answer": "us navy",
|
|
"gt_answers": [
|
|
"Blue Angels"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408243.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How old do you have to be to use this computer?",
|
|
"answer": "18",
|
|
"gt_answers": [
|
|
"Over 18 Only",
|
|
"OVER 18"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376682.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the bears foot?",
|
|
"answer": "bears foot",
|
|
"gt_answers": [
|
|
"Me to You"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334789.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the store's name?",
|
|
"answer": "chanel",
|
|
"gt_answers": [
|
|
"Charles & Keith"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_852.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is this jet?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"216"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376993.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the yellow vest>",
|
|
"answer": "frisbee",
|
|
"gt_answers": [
|
|
"Illegal rave party"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345067.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What artist's signature is on this painting?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Fiava"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1450.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is your favorite beer?",
|
|
"answer": "bud light",
|
|
"gt_answers": [
|
|
"Carlsberg Beer",
|
|
"CARLSBERG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823750_3226.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are you drinking?",
|
|
"answer": "beer",
|
|
"gt_answers": [
|
|
"Carlsberg Beer",
|
|
"CARLSBERG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823750_3226.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the drink in the picture?",
|
|
"answer": "coors light",
|
|
"gt_answers": [
|
|
"Carlsberg",
|
|
"Carlsberg Beer"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823750_3226.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the t-shirt?",
|
|
"answer": "great",
|
|
"gt_answers": [
|
|
"Cereal"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014333.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what name is written on the blue and white banner",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"New Era Fits",
|
|
"New Era"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000217925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the biggest word shown on the sign on the right side of this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MILES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155061.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word (bottom right) on the sign shown on the right side of this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"DAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155061.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address at the bottom?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"www.myprofe.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323952.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed on the lime green sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Sisak"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What instruction is written inside the red and white hexagon?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323049.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the color of the caps of players?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02799071_13320.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the color of the ball?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02799071_13320.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the color of the pant of the player wearing white jersey?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02799071_13320.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this?",
|
|
"answer": "canary islands",
|
|
"gt_answers": [
|
|
"La playa de Las Canteras.",
|
|
"LA PLAYA DE LAS CANTERAS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the wall with blue?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"RACK ED"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the three letters that come after the word SUSHI?",
|
|
"answer": "nigiri",
|
|
"gt_answers": [
|
|
"TEI"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_911.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference?",
|
|
"answer": "a sign of board reference is a sign",
|
|
"gt_answers": [
|
|
"way cool way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330509.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airline name on the planes pictured?",
|
|
"answer": "ana",
|
|
"gt_answers": [
|
|
"ANA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000573854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the bicycles?",
|
|
"answer": "haro",
|
|
"gt_answers": [
|
|
"trek"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4450.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on the back wall?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"Apia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322921.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the color of the tennis court?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322921.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the arched text say?",
|
|
"answer": "coffee shop",
|
|
"gt_answers": [
|
|
"Coffeeshop"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000346.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What clothing brand is above the woman's picture?",
|
|
"answer": "epi centre",
|
|
"gt_answers": [
|
|
"Guess"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "brandon street",
|
|
"gt_answers": [
|
|
"Brandon",
|
|
"Brandon St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371304.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the type of street sign?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"One way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371304.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store has a sale?",
|
|
"answer": "palladium",
|
|
"gt_answers": [
|
|
"Pull & Bear",
|
|
"PULL&BEAR"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_589.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the store front read?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"My Gym"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the dimensions for Regular?",
|
|
"answer": "the dimensions for regular are as follows",
|
|
"gt_answers": [
|
|
"18\" x 30\""
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03223299_3055.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the dimensions for Residence?",
|
|
"answer": "the dimensions for residence are 1 2 x 1 2",
|
|
"gt_answers": [
|
|
"28\" x 36\""
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03223299_3055.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who manufactures the Trinitron?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"SONY"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04152593_11730.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is in red?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"vans"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000444809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the top of the post?",
|
|
"answer": "a beauty salon",
|
|
"gt_answers": [
|
|
"Stationary Store"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000016449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What clinic is on the bottom of the post?",
|
|
"answer": "st mary's hospital",
|
|
"gt_answers": [
|
|
"Beauty Clinic"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000016449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the red vehicle?",
|
|
"answer": "mk ts",
|
|
"gt_answers": [
|
|
"XVP962",
|
|
"XVP 962"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business on the tower?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Attebury Grain"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000377619.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the slanted sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000377619.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the building?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text do you see at the bottom left of the picture?",
|
|
"answer": "bathroom",
|
|
"gt_answers": [
|
|
"MIAMIDREAMREALTY.COM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOUR OF HIS JACKET?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"BLACK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOUR OF HIS SHOES?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"WHITE WITH BLACK",
|
|
"white"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOUR OF HIS HELMET?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"BLACK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes the red truck?",
|
|
"answer": "scania",
|
|
"gt_answers": [
|
|
"SCANIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117514.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the URL on the side of the red truck?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.vmcltd.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117514.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company abbreviation on the red truck's cab?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"VMC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117514.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the cereal?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"grape-nuts"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the cereal?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Post"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the yogurt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Yoplait"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NUMBER OF LAST PERSON?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"60"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000477192.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the purple sign?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"Skylight"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_238.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign with food say?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"Skylight"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_238.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the fourth word in black type on the white sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"YIELD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000164842.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company that owns the building",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"vodafone"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001247.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the shape shown?",
|
|
"answer": "round",
|
|
"gt_answers": [
|
|
"Syringe"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04376876_10759.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on top left corner",
|
|
"answer": "a pantry",
|
|
"gt_answers": [
|
|
"all those details"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000351218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what three letters are written at the top center?",
|
|
"answer": "qqq",
|
|
"gt_answers": [
|
|
"OSS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374047.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the side of the bus?",
|
|
"answer": "a la guichet",
|
|
"gt_answers": [
|
|
"La Cure Gourmande"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which cell is closest to the camera?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"26"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the wine with the copper-colored top?",
|
|
"answer": "pinot gris",
|
|
"gt_answers": [
|
|
"BRIDLEWOOD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the DVD?",
|
|
"answer": "the kings speech",
|
|
"gt_answers": [
|
|
"The Situation Workout",
|
|
"THE SITUATION"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who manufactures the camera?",
|
|
"answer": "polaroid corporation",
|
|
"gt_answers": [
|
|
"Polaroid"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03976467_224.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the train?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"106"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411617.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company for the advertizement?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"fontana"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000384559.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the baseball batter's shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000413918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the box?",
|
|
"answer": "it is a cake",
|
|
"gt_answers": [
|
|
"Create"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335730.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which numbers are written in front of the train",
|
|
"answer": "the number of the train",
|
|
"gt_answers": [
|
|
"183 004"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000084103.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the train?",
|
|
"answer": "elx",
|
|
"gt_answers": [
|
|
"alex"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000084103.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer?",
|
|
"answer": "wingwatchers by jay",
|
|
"gt_answers": [
|
|
"Jodie"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02168699_4042.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where was this picture found?",
|
|
"answer": "on a wooden deck",
|
|
"gt_answers": [
|
|
"Wing Watchers.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02168699_4042.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where can you get pictures of insects?",
|
|
"answer": "wingwatchers com",
|
|
"gt_answers": [
|
|
"Wing Watchers.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02168699_4042.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this?",
|
|
"answer": "panasonic",
|
|
"gt_answers": [
|
|
"Panasonic"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04392985_4223.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "i do not know",
|
|
"gt_answers": [
|
|
"7125"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350833.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are on the left and right of the banner?",
|
|
"answer": "a and b",
|
|
"gt_answers": [
|
|
"k k",
|
|
"K TTS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000196676.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of lamp is it?",
|
|
"answer": "antler table lamp",
|
|
"gt_answers": [
|
|
"Antler Table Lamp"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04380533_2248.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is this?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"7 eleven"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_197.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the top sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000389016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the T-shirt",
|
|
"answer": "skate",
|
|
"gt_answers": [
|
|
"Relate"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409572.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the first word on top left",
|
|
"answer": "water",
|
|
"gt_answers": [
|
|
"soap"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04254120_3241.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who copyrighted this photo",
|
|
"answer": "angela kartt",
|
|
"gt_answers": [
|
|
"Angela Kraft"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02442845_18749.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the camera?",
|
|
"answer": "panasonic",
|
|
"gt_answers": [
|
|
"Pentax"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_11314.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what word is written on the airplane?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is listed on the boat?",
|
|
"answer": "cartagena",
|
|
"gt_answers": [
|
|
"CORTEZ"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04606251_4010.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many fluid ounces does the bottle hold?",
|
|
"answer": "24",
|
|
"gt_answers": [
|
|
"12"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03983396_17918.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of beverage does the bottle advertise?",
|
|
"answer": "cola",
|
|
"gt_answers": [
|
|
"COLA",
|
|
"RC COLA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03983396_17918.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which state is this bus from?",
|
|
"answer": "florida",
|
|
"gt_answers": [
|
|
"North Carolina"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of center is mentioned on the bus?",
|
|
"answer": "medical",
|
|
"gt_answers": [
|
|
"Medical-Dental"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number has the boy in white shirt?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the two letters after sig?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ht"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437778.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What this bus for?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"City sightseeing",
|
|
"SIGHTSEEING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437778.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOR OF THE TANK?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"BLUE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04562935_11100.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THE TANK?",
|
|
"answer": "water tower",
|
|
"gt_answers": [
|
|
"DEWES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04562935_11100.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the train?",
|
|
"answer": "id number",
|
|
"gt_answers": [
|
|
"1803"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is shown?",
|
|
"answer": "main street",
|
|
"gt_answers": [
|
|
"SMITH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red octagon instructing you to do?",
|
|
"answer": "turn left",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store in the fore front?",
|
|
"answer": "chanel",
|
|
"gt_answers": [
|
|
"PARAGON"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which stores is shown?",
|
|
"answer": "chanel",
|
|
"gt_answers": [
|
|
"Paragon"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this not?",
|
|
"answer": "a parking meter",
|
|
"gt_answers": [
|
|
"A PARKING METER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368061.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word in black comes below 1/2 price?",
|
|
"answer": "sale",
|
|
"gt_answers": [
|
|
"sale"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_779.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What activity is happening in the picture?",
|
|
"answer": "skiing",
|
|
"gt_answers": [
|
|
"Skiing"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000082766.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the blue vehicle in the centre of the image?",
|
|
"answer": "mcdo",
|
|
"gt_answers": [
|
|
"KAV 215F",
|
|
"KAV215F"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346598.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the make of the blue vehicle in the centre of the image?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"Dodge"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346598.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on the top of the front of the ble vehicle in the centre of the image?",
|
|
"answer": "john deere",
|
|
"gt_answers": [
|
|
"Bruce Mackie"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346598.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What beer company is advertised on the blue wall?",
|
|
"answer": "bud light",
|
|
"gt_answers": [
|
|
"BUD LIGHT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329680.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the reference number for this picture?",
|
|
"answer": "ltv a alligator",
|
|
"gt_answers": [
|
|
"7033"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02704792_5967.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name on the bottom left corner of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ron"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000209533.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number on the ship",
|
|
"answer": "the number on the ship",
|
|
"gt_answers": [
|
|
"3 AT 6 856"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written behind the man",
|
|
"answer": "fishing boat",
|
|
"gt_answers": [
|
|
"3 AT 6 856"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the vessel",
|
|
"answer": "m/v san francisco",
|
|
"gt_answers": [
|
|
"3 AT 6 856"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What dates are on this baseball?",
|
|
"answer": "october 1st, 2017",
|
|
"gt_answers": [
|
|
"1964-2004"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155790.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is present in the green board",
|
|
"answer": "clock",
|
|
"gt_answers": [
|
|
"For Lease"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132193.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is number showing in the board",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"6046623000",
|
|
"604 662 3000"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132193.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the wooden sign?",
|
|
"answer": "bear creek",
|
|
"gt_answers": [
|
|
"Bear Cave"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365173.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the game that is behind the cat?",
|
|
"answer": "monopoly",
|
|
"gt_answers": [
|
|
"The Moment of Truth"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000467762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "chris alice",
|
|
"gt_answers": [
|
|
"Chez Florence"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04443257_14733.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of cigarettes is being advertised?",
|
|
"answer": "marlboro",
|
|
"gt_answers": [
|
|
"Marlboro"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04443257_14733.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white car's license plate say?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"M 2759 UT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2239.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the large letter on the parking meter?",
|
|
"answer": "s",
|
|
"gt_answers": [
|
|
"e"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000250268.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the parking meter?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"6328"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000250268.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of things do they do here?",
|
|
"answer": "they do engineering",
|
|
"gt_answers": [
|
|
"Computer science and engineering"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000406.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the skateboard held by the man in yellow?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"loaded"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000410601.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the beverage?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Inca Kola"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000290974.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is it on the object?",
|
|
"answer": "the time is 12 noon",
|
|
"gt_answers": [
|
|
"10:32"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03196217_5469.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the image advertise in red font?",
|
|
"answer": "commitment to excellence",
|
|
"gt_answers": [
|
|
"LOW PRICES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03196217_5469.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the boy in brown doing?",
|
|
"answer": "skateboarding",
|
|
"gt_answers": [
|
|
"skateboarding"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000531894.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "london bridge",
|
|
"gt_answers": [
|
|
"Degenham Dock"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000100506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"EL2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000100506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store with the purple sign?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"EPICURIA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417010.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the Lecture Theatre?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_163.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which airline is this plane?",
|
|
"answer": "air canada",
|
|
"gt_answers": [
|
|
"Air Canada"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is airline company appears on the side of the plane?",
|
|
"answer": "air canada",
|
|
"gt_answers": [
|
|
"AIR CANADA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the mans life jacket?",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"Yellow"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411497.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the boat?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411497.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is displayed on the heart?",
|
|
"answer": "love",
|
|
"gt_answers": [
|
|
"LOVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are on the crate on the right?",
|
|
"answer": "crate number 1",
|
|
"gt_answers": [
|
|
"17342",
|
|
"724"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000093707.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of airmen are referred to on the plane?",
|
|
"answer": "pilot",
|
|
"gt_answers": [
|
|
"Tuskegee Airmen",
|
|
"Tuskegee"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many rounds are in the main event?",
|
|
"answer": "eight",
|
|
"gt_answers": [
|
|
"12 ROUNDS",
|
|
"12"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354197.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who are the two fighters?",
|
|
"answer": "johnson and johnson",
|
|
"gt_answers": [
|
|
"Joe Frazier vs. Muhammad Ali",
|
|
"Joe Frazier, Muhammad Ali"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354197.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the sign?",
|
|
"answer": "bike lane",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000021528.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the bottom of the door?",
|
|
"answer": "i'm the king",
|
|
"gt_answers": [
|
|
"Show"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say at the top of the door?",
|
|
"answer": "i'm a king",
|
|
"gt_answers": [
|
|
"circus"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the baseball hat?",
|
|
"answer": "i'm a rapper",
|
|
"gt_answers": [
|
|
"Compton"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of TV is labeled on the box?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"samsung"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000387693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the table number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"14"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000180606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in the top right of the image say?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"Happy Duckling"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03908618_34292.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number listed in the corner?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"063_856"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the silver appliance?",
|
|
"answer": "mr coffee",
|
|
"gt_answers": [
|
|
"xast"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264618.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is New and Improved?",
|
|
"answer": "soft & flexible dispenser",
|
|
"gt_answers": [
|
|
"DISPENSER"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005752.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is the Paper Bagt from?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Cracker Barrel"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005752.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where does the sign on the right say this is?",
|
|
"answer": "city centre",
|
|
"gt_answers": [
|
|
"Piccadilly Gardens"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000106382.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue sign say?",
|
|
"answer": "manchester city centre",
|
|
"gt_answers": [
|
|
"cycle hub"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000106382.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of street sign is this?",
|
|
"answer": "london road sign",
|
|
"gt_answers": [
|
|
"zone",
|
|
"ZONE ENDS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000285810.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number does the bus have",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340445.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the lagest word on the motel sign",
|
|
"answer": "motel",
|
|
"gt_answers": [
|
|
"Motel"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000816.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many megabits are displayed on the bus advertisement?",
|
|
"answer": "100 megabits",
|
|
"gt_answers": [
|
|
"5",
|
|
"THREE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417107.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the route number of the yellow bus?",
|
|
"answer": "route number",
|
|
"gt_answers": [
|
|
"66"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417107.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the yellow bottle?",
|
|
"answer": "dettol",
|
|
"gt_answers": [
|
|
"Sunlight"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012521.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the time on the platform?",
|
|
"answer": "the time on the platform",
|
|
"gt_answers": [
|
|
"09:05:25"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000580979.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the sign?",
|
|
"answer": "expo",
|
|
"gt_answers": [
|
|
"CYCLISTS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign?",
|
|
"answer": "car",
|
|
"gt_answers": [
|
|
"CARRALL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first two words on the sign?",
|
|
"answer": "carrill closed",
|
|
"gt_answers": [
|
|
"CARRALL CLOSED"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the circus",
|
|
"answer": "the bear in the circus",
|
|
"gt_answers": [
|
|
"banana"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the women's skirt made of?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"banana"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red sign.",
|
|
"answer": "teddy bears' picnic",
|
|
"gt_answers": [
|
|
"CIRCUS banana",
|
|
"BANANA CIRCUS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the copyright year?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2004"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02226429_14872.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign on the far left?",
|
|
"answer": "movie theater",
|
|
"gt_answers": [
|
|
"Real d 3d"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is sponsoring this tennis match?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Scottrade"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337117.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is the distance of Jackson?",
|
|
"answer": "5 miles",
|
|
"gt_answers": [
|
|
"500"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000557263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What place is located at 800m away?",
|
|
"answer": "jackson street",
|
|
"gt_answers": [
|
|
"Montgomery"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000557263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the milk?",
|
|
"answer": "sam's club",
|
|
"gt_answers": [
|
|
"Clover"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000054521.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the Old El Paso jar?",
|
|
"answer": "cat food",
|
|
"gt_answers": [
|
|
"salsa"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000054521.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this bottle?",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"appletiser"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000364722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percent is apple juice?",
|
|
"answer": "100 percent",
|
|
"gt_answers": [
|
|
"100%"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000364722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which class is mentioned in this photograph?",
|
|
"answer": "english",
|
|
"gt_answers": [
|
|
"1986"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign in this photograph say?",
|
|
"answer": "christ south high school",
|
|
"gt_answers": [
|
|
"Chatsworth High School"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What chocolate brand is featured?",
|
|
"answer": "godiva",
|
|
"gt_answers": [
|
|
"Godiva"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_170.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is Godiva from?",
|
|
"answer": "belgium",
|
|
"gt_answers": [
|
|
"Belgium"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_170.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What gate number is the plane at?",
|
|
"answer": "a gate",
|
|
"gt_answers": [
|
|
"40"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362978.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the players last name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BRAUN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000283678.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the players jersey number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000283678.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline on the back of the cart?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say at the top?",
|
|
"answer": "tepa kabab",
|
|
"gt_answers": [
|
|
"Tzita Kaaba"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is a type of cheese very popular on pizzas?",
|
|
"answer": "mozzarella",
|
|
"gt_answers": [
|
|
"Parmesan"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019374.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "According to the sign, how far is the desert?",
|
|
"answer": "5 miles",
|
|
"gt_answers": [
|
|
"300 miles"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who manufactured the green pickup truck?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"Ford"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last two digit number in the first line of the sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"19"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362625.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number on the players shirt?",
|
|
"answer": "17",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_63092.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the business on the banner?",
|
|
"answer": "washington capitals",
|
|
"gt_answers": [
|
|
"chesapeake beach resort & spa"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_63092.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number shirt is the player in red wearing?",
|
|
"answer": "17",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_63092.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last letter of graffiti on the man's chin?",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"B"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1642.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made the hydrant?",
|
|
"answer": "johns manville",
|
|
"gt_answers": [
|
|
"Smith"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420066.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the player in the red jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"30"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000376523.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the batter?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ASTUDILLO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000376523.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white on the red octagon sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000079191.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the yellow front of the train?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"47790"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what's the word on the top right?",
|
|
"answer": "spicy",
|
|
"gt_answers": [
|
|
"mix"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006098.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What tag number is on the middle cow's ear?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"113"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000530460.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many ounces are in this container?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1.25",
|
|
"1.25 FL OZ"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of food is in the box?",
|
|
"answer": "roast lamb",
|
|
"gt_answers": [
|
|
"Roast Lamb"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many elephants are there?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"3",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402041.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the main color the elephant riders are wearing?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402041.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color are the elephants?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"Gray",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402041.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of animal is the man kissing",
|
|
"answer": "snake",
|
|
"gt_answers": [
|
|
"Snake"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01748264_17201.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was this photograph taken?",
|
|
"answer": "november 2015",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000399097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the sidewalk with the arrow say?",
|
|
"answer": "st peter's street",
|
|
"gt_answers": [
|
|
"TEA ROOM OPEN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000399097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written beneath corona",
|
|
"answer": "tequila",
|
|
"gt_answers": [
|
|
"extra"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_8352.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase follows the name Obama?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"yes you can"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000515550.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of typewriter is being used?",
|
|
"answer": "a royal typewriter",
|
|
"gt_answers": [
|
|
"Olympia"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04505470_4902.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the photo taken?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000560885.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "There are 3 people in this ____?",
|
|
"answer": "picture",
|
|
"gt_answers": [
|
|
"photo"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000560885.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate?",
|
|
"answer": "md",
|
|
"gt_answers": [
|
|
"JGT 657"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376934.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of sign can you see?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376934.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bag in the front?",
|
|
"answer": "samsonite",
|
|
"gt_answers": [
|
|
"Everest"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the bag in the front?",
|
|
"answer": "rucksack",
|
|
"gt_answers": [
|
|
"Everest"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the novel in the image?",
|
|
"answer": "cat in the bag",
|
|
"gt_answers": [
|
|
"HARRY POTTER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000415604.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word in the name of the pictured store?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"trend"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000186196.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airlines does this plane fly for?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"swiss"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000503782.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is this a bar or club in NYC?",
|
|
"answer": "a bathroom in a bar",
|
|
"gt_answers": [
|
|
"N.Y.C."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000482433.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand on the red advertisement?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Bank of America"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what type of boat is the blue and white one, the big word written on the blue and white boat.",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"police"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321960.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "no bicycles in station",
|
|
"gt_answers": [
|
|
"walk your bike"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000084981.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the main text say on the blue jersey?",
|
|
"answer": "santa clara",
|
|
"gt_answers": [
|
|
"Acronis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346031.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of car is this?",
|
|
"answer": "honda civic",
|
|
"gt_answers": [
|
|
"Honda"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000112726.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What movie is shown on the DVD cover?",
|
|
"answer": "dinosaur",
|
|
"gt_answers": [
|
|
"Beverly Hills Chihuahua"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332921.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What soda company is seen on the cup?",
|
|
"answer": "pepsi",
|
|
"gt_answers": [
|
|
"Pepsi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the image taken?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578306.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what type of shop is in the image",
|
|
"answer": "bookshop",
|
|
"gt_answers": [
|
|
"book shop",
|
|
"BOOKSHOP"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02871525_23832.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what dose the yellow sign say on the building",
|
|
"answer": "bookshop",
|
|
"gt_answers": [
|
|
"open"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02871525_23832.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many people are in the image",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"one"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02871525_23832.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are above the image of the red fruit?",
|
|
"answer": "fruit juice",
|
|
"gt_answers": [
|
|
"Discover Flavour"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316462.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the town on the fire engine?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"holliston"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000439890.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is next (left-side) of the word \"stroke\" in front of the green car?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000477015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 4-digit number is on the yellow stick in front of the green car?",
|
|
"answer": "999",
|
|
"gt_answers": [
|
|
"4764"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000477015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 3 letters are shown on the bottom left of the green car?",
|
|
"answer": "pk",
|
|
"gt_answers": [
|
|
"cng"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000477015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the character appearing on the microwave?",
|
|
"answer": "hello kitty",
|
|
"gt_answers": [
|
|
"Hello Kitty"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000322325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the original price of the item?",
|
|
"answer": "$99",
|
|
"gt_answers": [
|
|
"91.99"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000322325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of the item after the temporary price cut?",
|
|
"answer": "$99",
|
|
"gt_answers": [
|
|
"69.99"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000322325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is printed on the blue roof?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371685.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the colour of the car?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is lying beside the car?",
|
|
"answer": "purple umbrella",
|
|
"gt_answers": [
|
|
"Umbrella"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the colour of umbrella?",
|
|
"answer": "purple",
|
|
"gt_answers": [
|
|
"Violet"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "no diving",
|
|
"gt_answers": [
|
|
"No diving"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417309.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue sign?",
|
|
"answer": "tees",
|
|
"gt_answers": [
|
|
"Gas Showroom"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of invitation is this?",
|
|
"answer": "a wedding invitation",
|
|
"gt_answers": [
|
|
"Wedding"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000460643.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does it say on the building",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TRGOVACKI CENTAR DUBRAVA"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3352.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of business is OCBC?",
|
|
"answer": "bank",
|
|
"gt_answers": [
|
|
"Bank"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_412.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign tell people?",
|
|
"answer": "road ahead closed",
|
|
"gt_answers": [
|
|
"Road Ahead Closed"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3054.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is stated in the beginning of the name of the airline?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"AIR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airlines name following the word air?",
|
|
"answer": "philippine airlines",
|
|
"gt_answers": [
|
|
"PACIFIC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Below air pacific, what pacific island name does it say?",
|
|
"answer": "philippines",
|
|
"gt_answers": [
|
|
"fiji"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is displayed",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"Bananas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404433.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is command of the digital sign?",
|
|
"answer": "bees clear",
|
|
"gt_answers": [
|
|
"BIKES USE CLARK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written above the letters 'pas'",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"hot"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000125694.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the brownie mix?",
|
|
"answer": "ghirardelli triple chocolate",
|
|
"gt_answers": [
|
|
"GHIRARDELLI"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003210.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in red?",
|
|
"answer": "supra",
|
|
"gt_answers": [
|
|
"Sopranos",
|
|
"SOPRANOS CHIANTI SODA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03983396_11337.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three capital letters are shown on the yellow umbrella?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"RMF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000158362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What did this sign say before it was vandalized?",
|
|
"answer": "do not enter",
|
|
"gt_answers": [
|
|
"do not enter"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000012641.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the vandalized writing on this sign say?",
|
|
"answer": "do not enter",
|
|
"gt_answers": [
|
|
"IN MY ASSHOLE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000012641.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is exactly said on this sign, including the vandalized text?",
|
|
"answer": "do not enter",
|
|
"gt_answers": [
|
|
"do not IN MY ASSHOLE enter",
|
|
"do not enter in my asshole"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000012641.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this bus going?",
|
|
"answer": "indian city",
|
|
"gt_answers": [
|
|
"Broadway"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150554.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being offered at the right hand?",
|
|
"answer": "coffee shop",
|
|
"gt_answers": [
|
|
"French Bread"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_551.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the place located in the center of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BENJI'S"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1643.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the name of the cafe on the computer screen?",
|
|
"answer": "the cafe",
|
|
"gt_answers": [
|
|
"PHILLIES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03782006_36741.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are in the cafe?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03782006_36741.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many microphones are in the image?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03782006_36741.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business in this photo?",
|
|
"answer": "saint burger",
|
|
"gt_answers": [
|
|
"Giant Burger"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333950.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of stop sign is in the image?",
|
|
"answer": "all way stop sign",
|
|
"gt_answers": [
|
|
"ALL WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000082745.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the maximum capacity of the glass",
|
|
"answer": "600 ml",
|
|
"gt_answers": [
|
|
"600 mL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_4697.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of this glass",
|
|
"answer": "pyrex brand",
|
|
"gt_answers": [
|
|
"KIMAX"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_4697.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the sponsor displayed on the side?",
|
|
"answer": "fedex",
|
|
"gt_answers": [
|
|
"FedEx"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the present on left corner",
|
|
"answer": "tennis racket",
|
|
"gt_answers": [
|
|
"naDaL news.com",
|
|
"NADALNEWS.COM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375004.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the glasses?",
|
|
"answer": "wine",
|
|
"gt_answers": [
|
|
"Wine"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000576389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many are in front?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000576389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is located on the bottom left corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Brian D. Luster"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000086552.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is writing in the board?",
|
|
"answer": "no dogs",
|
|
"gt_answers": [
|
|
"NO DOGS",
|
|
"NO DOGS TEACUP POODLES OK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339817.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four letter word is in the middle of the cauldron?",
|
|
"answer": "magic",
|
|
"gt_answers": [
|
|
"Soup"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02939185_719.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the large red letter and number combo on the side of the plane?",
|
|
"answer": "it is a lst",
|
|
"gt_answers": [
|
|
"N6777B"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text of the watermark",
|
|
"answer": "a classic car",
|
|
"gt_answers": [
|
|
"(C) Simon Turner"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the computer?",
|
|
"answer": "lenovo",
|
|
"gt_answers": [
|
|
"IBM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000119849.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is wirtten on the cup",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"Joanne"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370364.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the main destination?",
|
|
"answer": "city centre",
|
|
"gt_answers": [
|
|
"Middlesbrough"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000281285.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of the lowest marked item in this image?",
|
|
"answer": "$5",
|
|
"gt_answers": [
|
|
"$2.50"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000346915.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"no turns"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160009.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first three letters on the man's sweater?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"CUV"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319535.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what bus number is this?",
|
|
"answer": "a bus",
|
|
"gt_answers": [
|
|
"154"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which city is the red bus from?",
|
|
"answer": "liverpool",
|
|
"gt_answers": [
|
|
"CHESTER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098943.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which state is the State Fair being held?",
|
|
"answer": "missouri",
|
|
"gt_answers": [
|
|
"Wisconsin"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404847.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what brand is this chainsaw ?",
|
|
"answer": "husqvarna",
|
|
"gt_answers": [
|
|
"Farm Boss",
|
|
"FARMBOSS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03000684_7138.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street block number?",
|
|
"answer": "401",
|
|
"gt_answers": [
|
|
"4300"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000374891.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time of road is this?",
|
|
"answer": "oasis",
|
|
"gt_answers": [
|
|
"DR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000374891.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cross street for Wonderland?",
|
|
"answer": "oasis ave",
|
|
"gt_answers": [
|
|
"oasis"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000374891.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What restaurant is pictured?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"McDonald's"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_195.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which restaurant is shown?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"McDonald's"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_195.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bright, yellow shirt?",
|
|
"answer": "i love my city",
|
|
"gt_answers": [
|
|
"Word",
|
|
"WORD."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362985.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers (dashes included) on the \"Barber Shop\" sign?",
|
|
"answer": "no dashes",
|
|
"gt_answers": [
|
|
"212-866-4160"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362985.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many pedestrians are in the picture?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"Four"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362985.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the largest sticker on the ramp",
|
|
"answer": "vans",
|
|
"gt_answers": [
|
|
"Vans"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414219.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign above the two men in red shirts?",
|
|
"answer": "santa fe",
|
|
"gt_answers": [
|
|
"caixanova"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/793.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter above the & sign?",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"S"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408421.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the sport on the tee shirt?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412290.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is in the blue circle on the tee shirt?",
|
|
"answer": "z",
|
|
"gt_answers": [
|
|
"F"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412290.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word are written on the tee shirt?",
|
|
"answer": "zions",
|
|
"gt_answers": [
|
|
"Titans Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412290.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this used for?",
|
|
"answer": "a sharpener",
|
|
"gt_answers": [
|
|
"Pencil sharpener"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03908714_2869.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of attraction is this?",
|
|
"answer": "museum",
|
|
"gt_answers": [
|
|
"Museum"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334061.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is on the sign?",
|
|
"answer": "1800",
|
|
"gt_answers": [
|
|
"1730"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334061.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the top book?",
|
|
"answer": "a book of hours",
|
|
"gt_answers": [
|
|
"THE ALCHEMIST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the bottom book with red lines?",
|
|
"answer": "a book of hours",
|
|
"gt_answers": [
|
|
"A PEOPLE'S HISTORY OF THE UNITED STATES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the book on top of the book with red lines?",
|
|
"answer": "i am a cat",
|
|
"gt_answers": [
|
|
"RATS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What blue word is printed on the side of the plane?",
|
|
"answer": "united",
|
|
"gt_answers": [
|
|
"United"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407567.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What district does the street sign say?",
|
|
"answer": "district b",
|
|
"gt_answers": [
|
|
"SOUTH STREET SEAPORT HISTORIC DISTRICT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word stands out the most on the red poster to the left of the dog?",
|
|
"answer": "dog",
|
|
"gt_answers": [
|
|
"BEEP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000522667.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last four letters for the word on the top left of the picture?",
|
|
"answer": "st",
|
|
"gt_answers": [
|
|
"Roes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000522667.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Why is the keyboard so dusty",
|
|
"answer": "i didn't clean it in a while",
|
|
"gt_answers": [
|
|
"shift"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000064896.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many hours can you park?",
|
|
"answer": "24 hours",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word describes the children at play?",
|
|
"answer": "playful",
|
|
"gt_answers": [
|
|
"slow"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does 2 hour parking begin?",
|
|
"answer": "at 8 am",
|
|
"gt_answers": [
|
|
"9:00",
|
|
"9:00 AM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number in the blue box?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"402"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336353.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the flyer read in big letters?",
|
|
"answer": "before and after",
|
|
"gt_answers": [
|
|
"Before"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414050.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How long is full-time",
|
|
"answer": "a full-time student is a student who is enrolled full-time",
|
|
"gt_answers": [
|
|
"24 weeks"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414050.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on which the Stop sign appears?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"45TH PARALLEL DR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000314279.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign tell drivers to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000314279.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where can I buy shoes here?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"footaction"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411987.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written inside the heart?",
|
|
"answer": "hello kitty",
|
|
"gt_answers": [
|
|
"Love"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02971356_16590.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the slogan for J-Donuts?",
|
|
"answer": "j-donuts j-donuts",
|
|
"gt_answers": [
|
|
"the unreal taste"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000234176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on it?",
|
|
"answer": "no parking here to corner",
|
|
"gt_answers": [
|
|
"No parking here to corner"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319596.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is handwritten on the paper?",
|
|
"answer": "to go",
|
|
"gt_answers": [
|
|
"Cheers"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015759.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the symbol on the glass",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"apple"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where do these escalators lead?",
|
|
"answer": "to the underground station",
|
|
"gt_answers": [
|
|
"SUBWAY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where are these escalators located?",
|
|
"answer": "buchanan street station",
|
|
"gt_answers": [
|
|
"Buchanan Street"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the lawnmower?",
|
|
"answer": "kobalt",
|
|
"gt_answers": [
|
|
"TG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03649909_32925.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the lawnmower?",
|
|
"answer": "john deere",
|
|
"gt_answers": [
|
|
"KUBOTA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03649909_32925.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the model of the lawnmower?",
|
|
"answer": "briggs and stratton",
|
|
"gt_answers": [
|
|
"TG1660"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03649909_32925.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made this product?",
|
|
"answer": "tamron",
|
|
"gt_answers": [
|
|
"Tamron"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_1181.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the flower stall called?",
|
|
"answer": "market",
|
|
"gt_answers": [
|
|
"GRANVILLE ISLAND FLORIST",
|
|
"GRANVILLE ISLAND"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what do sunlight farms sell?",
|
|
"answer": "vegetables",
|
|
"gt_answers": [
|
|
"Fresh Fruit and Vegetables"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the first word on the street sign?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"Sorry"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000251335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company shown in the top right?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"SWAROVSKI"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_802.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the food pictured?",
|
|
"answer": "beef stroganoff",
|
|
"gt_answers": [
|
|
"Beef Stroganoff"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which block is to the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"800"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000188139.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"kearny"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000188139.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the building?",
|
|
"answer": "a restaurant",
|
|
"gt_answers": [
|
|
"Bocoray"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374501.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name on the blue sign?",
|
|
"answer": "sherman wy",
|
|
"gt_answers": [
|
|
"Sherman Wy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THIS SHOP?",
|
|
"answer": "h m",
|
|
"gt_answers": [
|
|
"OCBC BANK"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the city on the bike sign?",
|
|
"answer": "toronto",
|
|
"gt_answers": [
|
|
"ONTARIO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000234839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which airline do these planes belong to?",
|
|
"answer": "india go",
|
|
"gt_answers": [
|
|
"IndiGo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000692.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number train is this?",
|
|
"answer": "656",
|
|
"gt_answers": [
|
|
"8948"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400049.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of restaurant does the red sign on the top left make reference to?",
|
|
"answer": "sushi",
|
|
"gt_answers": [
|
|
"Indian Vegetarian"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370592.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the left of the image?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"SWAROVSKI"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the batter's number?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403749.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of butter is on the table",
|
|
"answer": "white butter",
|
|
"gt_answers": [
|
|
"balance smart",
|
|
"SMART BALANCE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000396029.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in yellow on the red and blue striped shirt?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"unicef"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000229827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the TV brand?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"SONY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the website on the image?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"tom.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02790996_4546.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "no train number",
|
|
"gt_answers": [
|
|
"N 471"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159626.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photographers name?",
|
|
"answer": "michael scott",
|
|
"gt_answers": [
|
|
"Tai Strietman Photography"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02486261_18107.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the owl on the man's shirt say?",
|
|
"answer": "owl city",
|
|
"gt_answers": [
|
|
"Damn fool music"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351562.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the man's shirt?",
|
|
"answer": "owl city",
|
|
"gt_answers": [
|
|
"zatopeks"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351562.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word above the owl?",
|
|
"answer": "donut",
|
|
"gt_answers": [
|
|
"zatopeks"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351562.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the bike in front?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Ducati"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364345.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big letter on the scoreboard?",
|
|
"answer": "m",
|
|
"gt_answers": [
|
|
"C"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What US city is on the shirts at the bottom of the photo?",
|
|
"answer": "chicago",
|
|
"gt_answers": [
|
|
"Kansas City"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What player name is on the shirt on the right?",
|
|
"answer": "joe kelly",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What drink is being sold?",
|
|
"answer": "coffee",
|
|
"gt_answers": [
|
|
"Coca Cola",
|
|
"Coca-Cola"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349528.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written in the cake",
|
|
"answer": "teddy bear",
|
|
"gt_answers": [
|
|
"Julissa"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321447.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the doll decorated the cake",
|
|
"answer": "teddy bear",
|
|
"gt_answers": [
|
|
"Teddy bear"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321447.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the red and black bag on the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"OGI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000339670.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last number shown on the display?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000059817.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "sncf",
|
|
"gt_answers": [
|
|
"808"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000005340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the train company?",
|
|
"answer": "sncf",
|
|
"gt_answers": [
|
|
"SNCF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000005340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of train is this?",
|
|
"answer": "high speed train",
|
|
"gt_answers": [
|
|
"SNCF 808",
|
|
"SNCF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000005340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the cake?",
|
|
"answer": "christina",
|
|
"gt_answers": [
|
|
"Jenny"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318543.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How old is jenny turning?",
|
|
"answer": "20",
|
|
"gt_answers": [
|
|
"21"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318543.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the brand?",
|
|
"answer": "white cloud",
|
|
"gt_answers": [
|
|
"White cloud"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03887697_7332.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in the center of this picture?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000167354.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is beneeth the 'stop' on the sign?",
|
|
"answer": "street",
|
|
"gt_answers": [
|
|
"driving"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000167354.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what letters are visible on the storefront on the left?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"kshop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000167354.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on this sign?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"501"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the beer in the yellow box?",
|
|
"answer": "magic hat",
|
|
"gt_answers": [
|
|
"Magic Hat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412338.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the right corner of the bus?",
|
|
"answer": "57",
|
|
"gt_answers": [
|
|
"573"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000070104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kinds of products are being sold?",
|
|
"answer": "cosmetics",
|
|
"gt_answers": [
|
|
"Beauty Care"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_415.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sign say?",
|
|
"answer": "bear crossing",
|
|
"gt_answers": [
|
|
"RECONYX"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is on the grey shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065632.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is on the pitch?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"24"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065632.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word at the top of the pink board?",
|
|
"answer": "bathroom",
|
|
"gt_answers": [
|
|
"OUIJA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000134835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of this product?",
|
|
"answer": "marcol",
|
|
"gt_answers": [
|
|
"Marigold"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03871628_11287.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is this bus number?",
|
|
"answer": "i crosstown",
|
|
"gt_answers": [
|
|
"919"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is this bus going?",
|
|
"answer": "to cross town",
|
|
"gt_answers": [
|
|
"crosstown"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is this location?",
|
|
"answer": "elm street",
|
|
"gt_answers": [
|
|
"Lanaudiere"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375730.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is it made withb",
|
|
"answer": "it is made with oats",
|
|
"gt_answers": [
|
|
"Whole grain"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What animal is on the sigh?",
|
|
"answer": "sheep",
|
|
"gt_answers": [
|
|
"Sheep"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000458027.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year of the copyright?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01614925_3110.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who holds the copyright?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"gary phillips"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01614925_3110.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is another word for farm animals?",
|
|
"answer": "sheep",
|
|
"gt_answers": [
|
|
"livestock"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what brand/company is this bus?",
|
|
"answer": "mb",
|
|
"gt_answers": [
|
|
"Centrebus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written between the headlights of the bus",
|
|
"answer": "london city bus",
|
|
"gt_answers": [
|
|
"Centrebus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the left of the image read?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"surf shop",
|
|
"SURF SHOP LIQUIDATORS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116694.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the street sign?",
|
|
"answer": "lake street",
|
|
"gt_answers": [
|
|
"W LAKE ST"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1947.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this car's license plate read?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"LD12 UKK"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3279.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number on the police hat",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"11940"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04456115_16952.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written inbetween the two blue lights ontop of the truck?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GARDA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000094156.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the blue shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000286406.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the total fat?",
|
|
"answer": "20 g",
|
|
"gt_answers": [
|
|
"3g"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much sodium?",
|
|
"answer": "220 mg",
|
|
"gt_answers": [
|
|
"85mg"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the calories?",
|
|
"answer": "0 calories",
|
|
"gt_answers": [
|
|
"130"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this boat?",
|
|
"answer": "no name",
|
|
"gt_answers": [
|
|
"Lady Joan III"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351171.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the blue panel on the volleyball?",
|
|
"answer": "volley",
|
|
"gt_answers": [
|
|
"MiKASA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321521.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline this airplane belongs to?",
|
|
"answer": "air india",
|
|
"gt_answers": [
|
|
"IndiGo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the program on the screen?",
|
|
"answer": "flickr",
|
|
"gt_answers": [
|
|
"flickr"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottle with the purple cap contain?",
|
|
"answer": "olive oil",
|
|
"gt_answers": [
|
|
"Grapeseed oil"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What price is shown in the circle?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"$75"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_55129.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word has the largest font?",
|
|
"answer": "news",
|
|
"gt_answers": [
|
|
"NEWS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_55129.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of one of the sponsors of the tennis tournament?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Pacific Life"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000400409.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What auto make is sponsor of the tennis tournament?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"Mercedes Benz"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000400409.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many blocks away is lake monona?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412895.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"alle"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000177959.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the red bus going to?",
|
|
"answer": "a city",
|
|
"gt_answers": [
|
|
"Georgetown"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the side of the red bus?",
|
|
"answer": "revolution",
|
|
"gt_answers": [
|
|
"Circulator"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is the horse on?",
|
|
"answer": "street",
|
|
"gt_answers": [
|
|
"Verdugo St"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000473531.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of beverages are being advertised on the neon sign?",
|
|
"answer": "soda",
|
|
"gt_answers": [
|
|
"CAFE , cocktails"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000473531.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in english say?",
|
|
"answer": "reserve bank of india",
|
|
"gt_answers": [
|
|
"Reserve Bank",
|
|
"Reserve Bank Of India"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000975.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is written on the train",
|
|
"answer": "the train has a number on it",
|
|
"gt_answers": [
|
|
"31"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116423.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what letter is written next to the 31",
|
|
"answer": "d",
|
|
"gt_answers": [
|
|
"x"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116423.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the stick?",
|
|
"answer": "glue",
|
|
"gt_answers": [
|
|
"PRitt"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412914.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of a business in this image",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"Spa Symphony"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_410.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of a business in this image?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"Chabuton"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_410.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What male name is listed on the sign in the back?",
|
|
"answer": "mike",
|
|
"gt_answers": [
|
|
"Sean"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367866.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of wet wipes shown?",
|
|
"answer": "wet n wild",
|
|
"gt_answers": [
|
|
"HUGGIES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000405060.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the donut store?",
|
|
"answer": "the doughnut shop",
|
|
"gt_answers": [
|
|
"Voodoo Doughnut",
|
|
"VOODOO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347303.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many boats are at the shore?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320493.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is on the wine bottle?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000434894.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the left red button?",
|
|
"answer": "mute",
|
|
"gt_answers": [
|
|
"Guide"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white center circle button?",
|
|
"answer": "mute",
|
|
"gt_answers": [
|
|
"Info"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the slogan of The Irish Times",
|
|
"answer": "for the times in life",
|
|
"gt_answers": [
|
|
"For the Times We Live In"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3463.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which airline is this plane?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"SAS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405095.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed of the ball?",
|
|
"answer": "the speed of a tennis ball",
|
|
"gt_answers": [
|
|
"113 mph"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000448560.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bottom of the surfboard?",
|
|
"answer": "i am",
|
|
"gt_answers": [
|
|
"Katin"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367392.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the doctor on the green sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Sawaddipong"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000003493.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is this building?",
|
|
"answer": "201",
|
|
"gt_answers": [
|
|
"2565"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000488395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this building's number?",
|
|
"answer": "201",
|
|
"gt_answers": [
|
|
"2565"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000488395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four numbers are seen in this photo?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"2565"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000488395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written at the top of the label on the bottle of soda?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"special"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406013.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the watermark say?",
|
|
"answer": "graduate stock photos",
|
|
"gt_answers": [
|
|
"shutterstock"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02669723_13466.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes ROUGE PUR COUTURE",
|
|
"answer": "mac cosmetics",
|
|
"gt_answers": [
|
|
"Yves Saint Laurent"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315893.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is watermarked on this image?",
|
|
"answer": "sgfoodie",
|
|
"gt_answers": [
|
|
"shootfirsteatlater.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366387.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 5 digits written vertically in a row on the wall above the bananas?",
|
|
"answer": "a phone number",
|
|
"gt_answers": [
|
|
"12551"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000475939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the whole number beginning and ending in 1 written vertically on the wall above the bananas?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"12551"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000475939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first store on the left?",
|
|
"answer": "sephora",
|
|
"gt_answers": [
|
|
"KEITH",
|
|
"& KEITH"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_404.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Seconds store from the left>",
|
|
"answer": "sephora",
|
|
"gt_answers": [
|
|
"SEPHORA"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_404.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who makes the instrument shown?",
|
|
"answer": "yamaha",
|
|
"gt_answers": [
|
|
"Yamaha"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02804610_5152.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is in the object?",
|
|
"answer": "royal blend whisky",
|
|
"gt_answers": [
|
|
"The Royal Blend",
|
|
"ROYAL THE BLEND"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04579145_1461.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is that person playing",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410179.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of cheese is this?",
|
|
"answer": "feta cheese",
|
|
"gt_answers": [
|
|
"MOZZARELLA"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012143.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are wearing glasses?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"One",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352510.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the camera lens cap?",
|
|
"answer": "nik",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_9641.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is on the lens cap?",
|
|
"answer": "nik",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_9641.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What section of the city is this?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"Greenwich"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400654.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on top of the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"tel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000179031.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is printed on the chairs at the bottom of the image?",
|
|
"answer": "us open",
|
|
"gt_answers": [
|
|
"US OPEN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the advertisement on the left?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Heineken"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the arrow shaped sign show in the blue background?",
|
|
"answer": "london underground station",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000408537.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on the street sign?",
|
|
"answer": "london road",
|
|
"gt_answers": [
|
|
"CARRINGTON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000408537.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the city name shown at the bottom of the street sign?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"WESTMINSTER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000408537.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the bottle of water?",
|
|
"answer": "no brand",
|
|
"gt_answers": [
|
|
"ZEPHYRHILLS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the side of the taxi, just above the arrow?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"xicab"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000138217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What instruction is on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Are the two people men or women?",
|
|
"answer": "women",
|
|
"gt_answers": [
|
|
"Women"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055651.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the car?",
|
|
"answer": "stickers",
|
|
"gt_answers": [
|
|
"Revill"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04417672_16141.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bobsled?",
|
|
"answer": "whelen",
|
|
"gt_answers": [
|
|
"Whelen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02860847_147.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website under \"Whelen\"?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"USMilitary.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02860847_147.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is displayed in large lettering, on the side of the plane?",
|
|
"answer": "cargolux",
|
|
"gt_answers": [
|
|
"cargolux"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000272846.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is written on the machinery?",
|
|
"answer": "emerson",
|
|
"gt_answers": [
|
|
"Siemens-Schuckert"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001161.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the button with the up and down arrows on it?",
|
|
"answer": "d",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000467081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters come after the letters ATV/ on the same button?",
|
|
"answer": "tv",
|
|
"gt_answers": [
|
|
"DTV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000467081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What restaurant did the coffee cup come from?",
|
|
"answer": "donut king",
|
|
"gt_answers": [
|
|
"Tim Hortons"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000022080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the picture on the toilet say?",
|
|
"answer": "a dog",
|
|
"gt_answers": [
|
|
"wanted"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the crackers?",
|
|
"answer": "ritz",
|
|
"gt_answers": [
|
|
"House Recipe"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000185108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is listed on the red sign?",
|
|
"answer": "angels baseball",
|
|
"gt_answers": [
|
|
"State Farm"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the sports company listed on the fence?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"Fox Sports"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the red jersey?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign not want to be picked up?",
|
|
"answer": "garbage",
|
|
"gt_answers": [
|
|
"Blossoms",
|
|
"THE BLOSSOMS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white text on the sign?",
|
|
"answer": "stop beaver",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the arrow mark refer",
|
|
"answer": "detour",
|
|
"gt_answers": [
|
|
"desviacion"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362972.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which animal place on there",
|
|
"answer": "dog",
|
|
"gt_answers": [
|
|
"dog"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362972.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what he doing",
|
|
"answer": "riding his bike",
|
|
"gt_answers": [
|
|
"cycling"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362972.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the banner?",
|
|
"answer": "alma",
|
|
"gt_answers": [
|
|
"ALMA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02704792_19069.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the banner?",
|
|
"answer": "fie",
|
|
"gt_answers": [
|
|
"Peru"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02704792_19069.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this firetruck from?",
|
|
"answer": "coolville",
|
|
"gt_answers": [
|
|
"COOLVILLE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is visible in the white text inside the green square on the box visible in the photo?",
|
|
"answer": "healthy choice",
|
|
"gt_answers": [
|
|
"Healthy Choice"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009797.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"enone stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000252693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign with the red circle say?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"DO NOT ENTER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000252693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "the white house",
|
|
"gt_answers": [
|
|
"enone stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000252693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number train car is this?",
|
|
"answer": "a train car",
|
|
"gt_answers": [
|
|
"57307"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355210.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company that owns the image?",
|
|
"answer": "shutterstock",
|
|
"gt_answers": [
|
|
"iStockphoto"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03544143_7974.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What may happen if this product gets in your eyes?",
|
|
"answer": "irritate eyes",
|
|
"gt_answers": [
|
|
"It may irritate your eyes.",
|
|
"IRRITATE"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008342.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON IT",
|
|
"answer": "delta airlines",
|
|
"gt_answers": [
|
|
"DELTA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360715.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317403.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is labeled in the upper left?",
|
|
"answer": "57th street",
|
|
"gt_answers": [
|
|
"5 AV"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the far right?",
|
|
"answer": "macy's",
|
|
"gt_answers": [
|
|
"Van Cleef & Arpeh"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this lady selling?",
|
|
"answer": "cupcakes",
|
|
"gt_answers": [
|
|
"cupcakes"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the man holding?",
|
|
"answer": "hershey syrup",
|
|
"gt_answers": [
|
|
"HERSHEYS SYRUP"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07836838_2185.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter on the side of the boat?",
|
|
"answer": "p",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2888.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the word \"police\" written in?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2888.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many letters are in the word on the side of the boat?",
|
|
"answer": "seven",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2888.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the blue sign on the floor behind the player?",
|
|
"answer": "no shoes allowed",
|
|
"gt_answers": [
|
|
"POLO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323241.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the banana?",
|
|
"answer": "romance and adventure",
|
|
"gt_answers": [
|
|
"It was really very good"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316977.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who does the boat belong to?",
|
|
"answer": "us coast guard",
|
|
"gt_answers": [
|
|
"U.S. COAST GUARD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the boat number?",
|
|
"answer": "u s coast guard",
|
|
"gt_answers": [
|
|
"25678"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who makes the engine for the boat?",
|
|
"answer": "john deere",
|
|
"gt_answers": [
|
|
"HONDA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address?",
|
|
"answer": "the website address is http://www",
|
|
"gt_answers": [
|
|
"bbrtfilter.en.alibaba.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03843555_2897.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What resort are these men skiing at?",
|
|
"answer": "squaw valley",
|
|
"gt_answers": [
|
|
"timberline"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414499.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which brand is the black snowboard?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Burton"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414499.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "The orange painted snowboard has what letter and which number?",
|
|
"answer": "a and b",
|
|
"gt_answers": [
|
|
"K2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414499.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book",
|
|
"answer": "joy in the morning",
|
|
"gt_answers": [
|
|
"Joy of cooking"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000231091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much was spent on fast food in 1972?",
|
|
"answer": "$2 billion",
|
|
"gt_answers": [
|
|
"$3 billion",
|
|
"$3billion"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000181909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What gas station is the picture from?",
|
|
"answer": "mcdonalds gas station",
|
|
"gt_answers": [
|
|
"CITGO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000181909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the box say?",
|
|
"answer": "real boobs",
|
|
"gt_answers": [
|
|
"Real Boobs U",
|
|
"REAL BOOBS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400949.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the liscence number on the back of the car?",
|
|
"answer": "the license number on the back of the car",
|
|
"gt_answers": [
|
|
"8477 CJV"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/59.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three letters are on the top left item",
|
|
"answer": "aon",
|
|
"gt_answers": [
|
|
"AON"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_8517.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which is the number on the back?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"75"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000022482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which is player's name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Rodriguez"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000022482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the caption in this photo say?",
|
|
"answer": "diana whitney photography",
|
|
"gt_answers": [
|
|
"Diana Whyte Photography"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7875.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370519.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the total sale amount?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"15.02"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_14244.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the first sign?",
|
|
"answer": "brighton",
|
|
"gt_answers": [
|
|
"Turquoise ave"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361840.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the second plate?",
|
|
"answer": "brighton",
|
|
"gt_answers": [
|
|
"Brighton ave"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361840.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the third plate?",
|
|
"answer": "brighton",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361840.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS INDICATED IN THIS SIGN BOARD?",
|
|
"answer": "eating animals",
|
|
"gt_answers": [
|
|
"STOP EATING ANIMALS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000101753.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the armband?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"18"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02088364_8619.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this?",
|
|
"answer": "kraft",
|
|
"gt_answers": [
|
|
"Knorr"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013383.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What label is visible?",
|
|
"answer": "knorr",
|
|
"gt_answers": [
|
|
"NUTRITION"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013383.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the shirt of the person travelling on bike?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"POLICE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293647.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"4FINGERS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big blue word on the screen?",
|
|
"answer": "read life",
|
|
"gt_answers": [
|
|
"READ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big yellow word on the screen?",
|
|
"answer": "read life",
|
|
"gt_answers": [
|
|
"LIFE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue phrase say?",
|
|
"answer": "read life",
|
|
"gt_answers": [
|
|
"READ Every Day"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the third word in the book title starting with \"The Memory...\"",
|
|
"answer": "memory",
|
|
"gt_answers": [
|
|
"Cathedral"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013770.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the author Crais?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Robert"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013770.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of motorbike rider?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000262299.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the store's phone number?",
|
|
"answer": "770 - 777 - 8787",
|
|
"gt_answers": [
|
|
"726-1184"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359704.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the gold plaque?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the brown circle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LION"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the the top street sign say?",
|
|
"answer": "elm street",
|
|
"gt_answers": [
|
|
"High St",
|
|
"HIGH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402203.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402203.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the license plate of the black car?",
|
|
"answer": "sc bkr",
|
|
"gt_answers": [
|
|
"SF5I MPX",
|
|
"SF51MPX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000209176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the umpire's sleeve?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000369078.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white uppercase word on the object?",
|
|
"answer": "hockey",
|
|
"gt_answers": [
|
|
"Cup"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_41949.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the child's shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"West"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578766.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the cap say?",
|
|
"answer": "strawberry soda",
|
|
"gt_answers": [
|
|
"Strawberry Soda"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02877765_15153.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company name is on the mudflap?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"YAMAHA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000452162.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number on the digital sign at the top front of the bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"51"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374374.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What event is advertised in large red letters?",
|
|
"answer": "fashion show",
|
|
"gt_answers": [
|
|
"SALE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_874.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT ROAD SIGN IS THERE",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373994.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the Ferry?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FERRY BRIGANTIA OF BRISTOL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000449508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "westjet",
|
|
"gt_answers": [
|
|
"Westjet"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401448.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the plane identification number?",
|
|
"answer": "msn 370",
|
|
"gt_answers": [
|
|
"C-GMWJ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401448.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the copier?",
|
|
"answer": "toshiba",
|
|
"gt_answers": [
|
|
"TOSHIBA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03924679_11382.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the model number of the copier?",
|
|
"answer": "toshiba bizhub c200",
|
|
"gt_answers": [
|
|
"1560"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03924679_11382.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter of the artist's name at the bottom of the photo?",
|
|
"answer": "p",
|
|
"gt_answers": [
|
|
"f"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02111129_4495.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What web address is located at the bottom?",
|
|
"answer": "earthshots org",
|
|
"gt_answers": [
|
|
"EarthShots.org"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02487347_3680.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline does this aircraft belong to?",
|
|
"answer": "air france",
|
|
"gt_answers": [
|
|
"Airfrance"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326777.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the girl in the white jersey closest to the camera?",
|
|
"answer": "10",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364950.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the girl in the blue jersey?",
|
|
"answer": "10",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364950.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who does the picture belong to?",
|
|
"answer": "daniel",
|
|
"gt_answers": [
|
|
"Tom Boldt"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02115913_2390.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"405"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000547675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign indicate?",
|
|
"answer": "railroad crossing",
|
|
"gt_answers": [
|
|
"RAILROAD CROSSING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000547675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the phone number on the truck",
|
|
"answer": "the phone number is",
|
|
"gt_answers": [
|
|
"1-866-668-7666"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048747.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is the airplane from?",
|
|
"answer": "united airlines",
|
|
"gt_answers": [
|
|
"US Airways"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000196916.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white text on the black sign on the fender?",
|
|
"answer": "harley davidson",
|
|
"gt_answers": [
|
|
"F8364"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/61584.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the black numbers on the white sign on the forks?",
|
|
"answer": "the year of manufacture",
|
|
"gt_answers": [
|
|
"343"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/61584.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the chrome letters on the handlebars?",
|
|
"answer": "harley davidson",
|
|
"gt_answers": [
|
|
"AA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/61584.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WWW.WIMBLEDON.ORG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000359996.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed?",
|
|
"answer": "100 mph",
|
|
"gt_answers": [
|
|
"111",
|
|
"111 MPH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000359996.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written at the bottom of the silver container?",
|
|
"answer": "l o w",
|
|
"gt_answers": [
|
|
"Lisa's Retro Style"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03062245_6387.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"Auburn",
|
|
"AUBURN WAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building?",
|
|
"answer": "city hall",
|
|
"gt_answers": [
|
|
"Auburn Justice Center"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What should be do red?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"No Turn on Red",
|
|
"NO TURN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the importance of?",
|
|
"answer": "water",
|
|
"gt_answers": [
|
|
"Details"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369923.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the pills?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"300"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03937543_9745.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name on the sign?",
|
|
"answer": "common wealth",
|
|
"gt_answers": [
|
|
"Commonwealth Ave",
|
|
"COMMONWEALTH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326135.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the best by date on the container on the left?",
|
|
"answer": "the best by date on the container on the right",
|
|
"gt_answers": [
|
|
"Dec-26-2012"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first ingredient listed on the container on the left?",
|
|
"answer": "sugar",
|
|
"gt_answers": [
|
|
"Cranberries"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the third ingredient on the container on the left?",
|
|
"answer": "soy sauce",
|
|
"gt_answers": [
|
|
"Almonds"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the blue color portion of the bottle?",
|
|
"answer": "sprite",
|
|
"gt_answers": [
|
|
"flavorful spicy sea salt",
|
|
"Flavorful Dry sea salt"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355333.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the front of the stage say?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"Worship"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03445924_28059.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the microwave on the left?",
|
|
"answer": "ge",
|
|
"gt_answers": [
|
|
"DAEWOO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000497698.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the toy banana's name?",
|
|
"answer": "mr banana",
|
|
"gt_answers": [
|
|
"twinkie the kid"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000497698.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the vehicle?",
|
|
"answer": "bmw",
|
|
"gt_answers": [
|
|
"BMW"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03459775_16367.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are on the blue sign?",
|
|
"answer": "unicef",
|
|
"gt_answers": [
|
|
"UNHCR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "In the picture on the right what is written on the red octagonal sign?",
|
|
"answer": "auckland city centre",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000561861.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "W90 must you give way to?",
|
|
"answer": "a pedestrian",
|
|
"gt_answers": [
|
|
"pedestrians"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000561861.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What must you give to pedestrians?",
|
|
"answer": "the right of way",
|
|
"gt_answers": [
|
|
"way"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000561861.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What five letter name is on the clock?.",
|
|
"answer": "amsterdam",
|
|
"gt_answers": [
|
|
"JOYCE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360828.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What room is labeled on the sign?",
|
|
"answer": "waiting room",
|
|
"gt_answers": [
|
|
"REFRESHMENT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360828.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number are the platforms?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360828.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the street sign say on the left?",
|
|
"answer": "morgan street",
|
|
"gt_answers": [
|
|
"SOLEY ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is the planner on the wall for?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"1984"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000213172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are wearing wetsuits?",
|
|
"answer": "five",
|
|
"gt_answers": [
|
|
"fIVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the people standing on?",
|
|
"answer": "beach",
|
|
"gt_answers": [
|
|
"Sand"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is listed after a set of numbers on the left side?",
|
|
"answer": "vodka",
|
|
"gt_answers": [
|
|
"Poggio"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03871628_39494.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number comes after the 5 on the left side?",
|
|
"answer": "6",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03871628_39494.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the destination of the bus?",
|
|
"answer": "toronto",
|
|
"gt_answers": [
|
|
"Cromdale"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04487081_21488.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who makes this tool?",
|
|
"answer": "microsoft",
|
|
"gt_answers": [
|
|
"HP"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016912.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the tour company called?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"York Pullman City Tour"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000093070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the phone cost?",
|
|
"answer": "25 cents",
|
|
"gt_answers": [
|
|
"25 cents"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408147.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the tennis racket?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"W"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000528465.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number appears on his jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"13"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578023.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"61"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000297585.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the Safety House?",
|
|
"answer": "58",
|
|
"gt_answers": [
|
|
"9"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375581.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "rodeo st",
|
|
"gt_answers": [
|
|
"RODEO Dr 200",
|
|
"BH RODEO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06794110_5457.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can be read at the bottom of the picture?",
|
|
"answer": "person and person",
|
|
"gt_answers": [
|
|
"HEATHER ABOUNADER PHOTOGRAPHY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000538099.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is letters are on the helmet?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"agv"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate on the middle motorcycle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"VSU996"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what product is being sold in the vending machine?",
|
|
"answer": "french fries",
|
|
"gt_answers": [
|
|
"French fries",
|
|
"REAL FRENCH FRIES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_6764.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is wirtten on the airplane?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342238.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is written at the bottom of the picture?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342238.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What month abbreviation appears at the bottom of the picture?",
|
|
"answer": "feb",
|
|
"gt_answers": [
|
|
"Feb"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342238.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are on the side of the bus?",
|
|
"answer": "it",
|
|
"gt_answers": [
|
|
"SPTC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325975.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "all way",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361115.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color are the letters and border?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361115.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is written on the large case?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"1853"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000520569.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the art gallery?",
|
|
"answer": "sisko gallery",
|
|
"gt_answers": [
|
|
"Sisko Gallery"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the teddy bear?",
|
|
"answer": "person's name",
|
|
"gt_answers": [
|
|
"A BEAR HUG WOULD DO YOU GOOD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000351397.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus route?",
|
|
"answer": "zone 1",
|
|
"gt_answers": [
|
|
"i04"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000376668.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whose birthday is it?",
|
|
"answer": "it is marge's birthday",
|
|
"gt_answers": [
|
|
"MARCUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is named on the top sign?",
|
|
"answer": "apples",
|
|
"gt_answers": [
|
|
"Apples"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364446.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is named on the middle sign?",
|
|
"answer": "apples",
|
|
"gt_answers": [
|
|
"Pears"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364446.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is named on the bottom sign?",
|
|
"answer": "apples",
|
|
"gt_answers": [
|
|
"Plums"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364446.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the bus?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"Via San Antonio"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372669.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the beer bottle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Miller Lite"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000134144.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the green airplane?",
|
|
"answer": "ewa air cargo",
|
|
"gt_answers": [
|
|
"EVA AIR Cargo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404908.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the truck?",
|
|
"answer": "91",
|
|
"gt_answers": [
|
|
"781"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the English term for arret?",
|
|
"answer": "tower",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of work does the company who owns this photograph do?",
|
|
"answer": "financial services",
|
|
"gt_answers": [
|
|
"Photography"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406356.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top destintion on this sign?",
|
|
"answer": "singapore",
|
|
"gt_answers": [
|
|
"Club House"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_256.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which team does the red team represent?",
|
|
"answer": "the united states",
|
|
"gt_answers": [
|
|
"Canada"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1160011.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the lighted sign say?",
|
|
"answer": "aoc",
|
|
"gt_answers": [
|
|
"Hom Yoga"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_997.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what kind of court is it?",
|
|
"answer": "basketball court",
|
|
"gt_answers": [
|
|
"Food Court"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_640.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is currently batting?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ethier"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000184877.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of player 16?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ethier"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000184877.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the player whose team is currently up to bat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ethier"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000184877.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE TEXT ON THE LENS COVER?",
|
|
"answer": "canon",
|
|
"gt_answers": [
|
|
"CANON"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_20227.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "hamburg",
|
|
"gt_answers": [
|
|
"Mundoora.",
|
|
"MUNDOORA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335202.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last four digits of the licence plate?",
|
|
"answer": "the last four digits of the licence plate are 0",
|
|
"gt_answers": [
|
|
"5UGN"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02787622_6868.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first website printed on the green wall?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"oaklandathletics.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000153224.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the year on the cup?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"1926"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the blades described as?",
|
|
"answer": "chalk line",
|
|
"gt_answers": [
|
|
"Hooked"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03481172_8879.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the back wall?",
|
|
"answer": "number",
|
|
"gt_answers": [
|
|
"330"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000360813.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What make is the truck?",
|
|
"answer": "daf",
|
|
"gt_answers": [
|
|
"Scania"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325486.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the silver car?",
|
|
"answer": "tah",
|
|
"gt_answers": [
|
|
"NG05BSZ",
|
|
"NGO5 BSZ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325486.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the street sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"great southern overland stage ROUTE of 1849",
|
|
"great southern overland"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000548690.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the car?",
|
|
"answer": "hot rod",
|
|
"gt_answers": [
|
|
"Red Voo Doo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159915.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns this car?",
|
|
"answer": "a man",
|
|
"gt_answers": [
|
|
"Aaron & Misa Hagar",
|
|
"Aaron & Misha Hagar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159915.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the garage that built this car?",
|
|
"answer": "the garage",
|
|
"gt_answers": [
|
|
"Rat Runners Garage"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159915.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the head piece?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"earth"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last 3 words on the headpiece?",
|
|
"answer": "i am person",
|
|
"gt_answers": [
|
|
"show on earth"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word begins with 'S' in this picture?",
|
|
"answer": "street",
|
|
"gt_answers": [
|
|
"show"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country are they in?",
|
|
"answer": "united states",
|
|
"gt_answers": [
|
|
"USA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318556.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed under the windshield of the yellow and orange van?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"reading"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000183123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand advertised?",
|
|
"answer": "rabinowitz",
|
|
"gt_answers": [
|
|
"Railrunners"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_11899.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is Railrunners based?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"Groningen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_11899.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is Railrunners website?",
|
|
"answer": "railrunners website",
|
|
"gt_answers": [
|
|
"mscrailrunners.nl"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_11899.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the K word at the top of the sign?",
|
|
"answer": "king",
|
|
"gt_answers": [
|
|
"Kenko"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the top two words on the white sign?",
|
|
"answer": "fire hydrant",
|
|
"gt_answers": [
|
|
"Keep Out"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351191.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which year was this picture taken according to the caption below?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000305076.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first line on the cake?",
|
|
"answer": "happy birthday",
|
|
"gt_answers": [
|
|
"WELCOME"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335036.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is pictured on the right?",
|
|
"answer": "dillards",
|
|
"gt_answers": [
|
|
"L'Occitane"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_128.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the heading read inside the L'Occitane store?",
|
|
"answer": "l'occitane",
|
|
"gt_answers": [
|
|
"Skincare"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_128.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the back of the batter?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"25"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000008519.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word in white on the blue box on the shelf?",
|
|
"answer": "cat",
|
|
"gt_answers": [
|
|
"Four"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000223942.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the place of the pizza place?",
|
|
"answer": "north beach",
|
|
"gt_answers": [
|
|
"NORTH BEACH PIZZA",
|
|
"NORTH BEACH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000564781.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date of the flag behind the man",
|
|
"answer": "1776",
|
|
"gt_answers": [
|
|
"July 4th 2009"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329469.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the pile consist of?",
|
|
"answer": "shoes",
|
|
"gt_answers": [
|
|
"Shoes"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04120489_545.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is the man allowed to skate here?",
|
|
"answer": "yes",
|
|
"gt_answers": [
|
|
"NO",
|
|
"NO SKATEBOARDING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000030779.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of peace is on the left?",
|
|
"answer": "hair peace",
|
|
"gt_answers": [
|
|
"Hair",
|
|
"HAIR PEACE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337477.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of peace is on the right?",
|
|
"answer": "hair peace",
|
|
"gt_answers": [
|
|
"Bed",
|
|
"BED PEACE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337477.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What must you do when a pedestrian is within the crosswalk?",
|
|
"answer": "yield to the pedestrian",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000118730.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the blue writing on the can say?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"COCONUT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368979.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the white writing on the can say?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"Yeo's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368979.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top sign say?",
|
|
"answer": "cherry street",
|
|
"gt_answers": [
|
|
"Cherokee St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom sign say?",
|
|
"answer": "cherry street",
|
|
"gt_answers": [
|
|
"Cherry St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two street names are there?",
|
|
"answer": "cherry st",
|
|
"gt_answers": [
|
|
"Cherokee Cherry",
|
|
"CHEROKEE ST CHERRY ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top button say?",
|
|
"answer": "return",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_19870.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the black sign?",
|
|
"answer": "beach",
|
|
"gt_answers": [
|
|
"Llandudno"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000435124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHICH DEPARTMENT AIR CRAFT IS THIS?",
|
|
"answer": "navy",
|
|
"gt_answers": [
|
|
"U.S.NAVY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361915.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NUMBER SHOWN IN THE PICTURE ?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361915.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street display on the picture?",
|
|
"answer": "plymouth street",
|
|
"gt_answers": [
|
|
"PLYMPTON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000408515.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big white word on the red octagon?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000408515.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last three letters before the exclamation mark in the stop sign?",
|
|
"answer": "ton",
|
|
"gt_answers": [
|
|
"ism"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000408515.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "aviva",
|
|
"gt_answers": [
|
|
"Armavia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498377.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What institute is being advertised?",
|
|
"answer": "uk institute of education",
|
|
"gt_answers": [
|
|
"UCE Birmingham Faculty of Education",
|
|
"UCE Birmingham"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_1051.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the product brand?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"jungle"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000317349.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of product is this?",
|
|
"answer": "poster",
|
|
"gt_answers": [
|
|
"cell- phone",
|
|
"CELL-PHONE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000317349.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word has blue inside one of the letters?",
|
|
"answer": "horse",
|
|
"gt_answers": [
|
|
"BIORANCH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375818.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the bottom right?",
|
|
"answer": "horse",
|
|
"gt_answers": [
|
|
"Horse"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375818.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first 4 letters of the top right word?",
|
|
"answer": "equestrian",
|
|
"gt_answers": [
|
|
"Prof"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375818.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What place is written in the painting?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FIJI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000383576.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Does this plane transport passengers?",
|
|
"answer": "yes",
|
|
"gt_answers": [
|
|
"Seaplanes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000036500.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this plane located?",
|
|
"answer": "on the beach",
|
|
"gt_answers": [
|
|
"Catalina"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000036500.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus company printed on the front of the bus?",
|
|
"answer": "intercity express",
|
|
"gt_answers": [
|
|
"citylink"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415352.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company name of the pizza in the drawer",
|
|
"answer": "pizza hut",
|
|
"gt_answers": [
|
|
"Papa Johns"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000092331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the television screen?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"INSPIRED"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000092331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of vegetable is organically grown?",
|
|
"answer": "broccoli",
|
|
"gt_answers": [
|
|
"Broccoli"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359749.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sale price of the broccoli?",
|
|
"answer": "ten dollars",
|
|
"gt_answers": [
|
|
"$1.00"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359749.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the screen of the jukebox?",
|
|
"answer": "cd player",
|
|
"gt_answers": [
|
|
"My music"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02988304_9338.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word of the street name found on the side of the building?",
|
|
"answer": "brook",
|
|
"gt_answers": [
|
|
"BRICK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word of the street name found on the building?",
|
|
"answer": "brook",
|
|
"gt_answers": [
|
|
"LANE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word of the street name found in the background?",
|
|
"answer": "st",
|
|
"gt_answers": [
|
|
"BUXT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what website does this photo belong to?",
|
|
"answer": "deutsche meister",
|
|
"gt_answers": [
|
|
"www.DerRittmeister.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03929855_334.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the tail of the plane?",
|
|
"answer": "v",
|
|
"gt_answers": [
|
|
"E"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365664.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the typewriter?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Corona"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04264628_21334.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the bottom?",
|
|
"answer": "saturday",
|
|
"gt_answers": [
|
|
"Saturday"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02834397_1257.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "tas",
|
|
"gt_answers": [
|
|
"TAS Restaurant",
|
|
"TAS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04081281_10634.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "tas",
|
|
"gt_answers": [
|
|
"TAS Restaurant",
|
|
"TAS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04081281_10634.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant/",
|
|
"answer": "tas",
|
|
"gt_answers": [
|
|
"TAS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04081281_10634.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the neon jacket?",
|
|
"answer": "i am person",
|
|
"gt_answers": [
|
|
"Metropolitan Police"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What insect is referenced in the passage?",
|
|
"answer": "bee",
|
|
"gt_answers": [
|
|
"dragonfly"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is another name for 'dragonfly' in some parts of the US?",
|
|
"answer": "mosquito",
|
|
"gt_answers": [
|
|
"snake feeder"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ross Merritt Photography"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000319388.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store brand is shown?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"ZARA"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_749.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car manufacturer is sponsoring this match?",
|
|
"answer": "audi",
|
|
"gt_answers": [
|
|
"PEUGEOT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/107946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which concourse has the train stopped at?",
|
|
"answer": "concourse c",
|
|
"gt_answers": [
|
|
"f"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000130749.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the left-most label on the side of the laptop in the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CD-RW"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000025174.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THE FILE",
|
|
"answer": "glass flask",
|
|
"gt_answers": [
|
|
"LEVER FILE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_3440.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "HOW MANY ML IS THE BIG FLASK",
|
|
"answer": "250ml",
|
|
"gt_answers": [
|
|
"500 ML",
|
|
"500"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_3440.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the date of this picture?",
|
|
"answer": "the date is unknown",
|
|
"gt_answers": [
|
|
"6 5 2005"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"alitalia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000078825.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the logo in the top left of the photo?",
|
|
"answer": "topps",
|
|
"gt_answers": [
|
|
"Upper deck"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359501.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is at the bottom of the picture?",
|
|
"answer": "dave rutledge",
|
|
"gt_answers": [
|
|
"Dave Rutledge"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359501.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign on the silver pole?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"oNE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000327628.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the front of the bus say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"cumfybus optare eh06uny"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000177060.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to a pub",
|
|
"gt_answers": [
|
|
"VIA RUFFORD TARTLETON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000177060.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"202"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000177060.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the ski lift?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159996.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What it is title of this ski lift stop?",
|
|
"answer": "mountain top",
|
|
"gt_answers": [
|
|
"Mountain Top"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159996.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the label read on the bottle behind the kitten?",
|
|
"answer": "water bottle",
|
|
"gt_answers": [
|
|
"Easy to pour Easy to store"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000297979.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "quick groceries",
|
|
"gt_answers": [
|
|
"Quick Stop Groceries"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000064009.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand name?",
|
|
"answer": "rutland",
|
|
"gt_answers": [
|
|
"Rutland"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029309.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What item number is this product?",
|
|
"answer": "rutland",
|
|
"gt_answers": [
|
|
"84"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029309.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the flavoring of the product?",
|
|
"answer": "caramel peanut",
|
|
"gt_answers": [
|
|
"Caramel Peanut"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What month is the Bird Picture for?",
|
|
"answer": "january",
|
|
"gt_answers": [
|
|
"January"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029620.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the co-pilot?",
|
|
"answer": "dog",
|
|
"gt_answers": [
|
|
"Dog"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000282527.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the child's shirt say?",
|
|
"answer": "happy birthday",
|
|
"gt_answers": [
|
|
"viva"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07615774_10799.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is under the phone screen?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"Cingular"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is listed on the wall?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Friedas"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the bus?",
|
|
"answer": "ts ts",
|
|
"gt_answers": [
|
|
"BT-LR-17"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHO IS SPONSORED FOR THIS GAME?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GE Money"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000113617.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of light is the head lamp?",
|
|
"answer": "led",
|
|
"gt_answers": [
|
|
"LED"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117034.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can be done on JobStreet.com?",
|
|
"answer": "apply for a job",
|
|
"gt_answers": [
|
|
"View jobs by salary matching."
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photo tag's first name?",
|
|
"answer": "sam",
|
|
"gt_answers": [
|
|
"Caitlin"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02389026_25614.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "it isn't fals",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"tru"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000290724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Does the 4-way stop sign look bright enough?",
|
|
"answer": "yes, it does",
|
|
"gt_answers": [
|
|
"4-way"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254911.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Can you stop here in icy conditions?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254911.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Does the war on the sign make you feel uncomfortable?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"WAR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254911.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus license plate number?",
|
|
"answer": "nyc bus",
|
|
"gt_answers": [
|
|
"L-30523"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150434.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the image",
|
|
"answer": "the viaduct",
|
|
"gt_answers": [
|
|
"Quatrain Photo",
|
|
"\ufffd\ufffd QUATRAIN PHOTO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04532670_663.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is this product perfect for?",
|
|
"answer": "cooks",
|
|
"gt_answers": [
|
|
"Seniors & Visually Impaired"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000017164.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the main brand of water?",
|
|
"answer": "minaqua",
|
|
"gt_answers": [
|
|
"minaqua"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02747177_38232.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text on the store front?",
|
|
"answer": "santander",
|
|
"gt_answers": [
|
|
"Fruites Verdures"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1795.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What animal is the person feeding?",
|
|
"answer": "bird",
|
|
"gt_answers": [
|
|
"Bird"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000279476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the website this image is on?",
|
|
"answer": "wordpress com",
|
|
"gt_answers": [
|
|
"http://nonnaluna.wordpress.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04033901_3076.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the food in the picture?",
|
|
"answer": "cottage pie",
|
|
"gt_answers": [
|
|
"Cottage Pie"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street listed on the left?",
|
|
"answer": "der linden",
|
|
"gt_answers": [
|
|
"Unter den Linden"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which fruit is this?",
|
|
"answer": "apples",
|
|
"gt_answers": [
|
|
"Apple"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404584.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four numbers are on the bottom right of the picture",
|
|
"answer": "1, 2, 3, 4",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02099601_2690.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the engine number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"3003"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000540769.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom of the sign pointing to the left say?",
|
|
"answer": "between here and there",
|
|
"gt_answers": [
|
|
"between here and there"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000168905.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What station is located to the left?",
|
|
"answer": "london bridge",
|
|
"gt_answers": [
|
|
"Parson Street Staton"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000168905.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website can this be found on?",
|
|
"answer": "nature",
|
|
"gt_answers": [
|
|
"www.naturfoto.cz"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02486261_7976.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand?",
|
|
"answer": "wahl",
|
|
"gt_answers": [
|
|
"Visiq"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03483316_49740.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who makes these donoughts?",
|
|
"answer": "buckeye donuts",
|
|
"gt_answers": [
|
|
"Busken"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411970.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is after have in the first line of the paper?",
|
|
"answer": "have",
|
|
"gt_answers": [
|
|
"arrived"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352831.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the top sign?",
|
|
"answer": "all",
|
|
"gt_answers": [
|
|
"All"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000558475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the bottom right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ROB BIXBY 2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000191350.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is displayed in the top left of the image?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"corbis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/897.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in blue on the train?",
|
|
"answer": "aachen",
|
|
"gt_answers": [
|
|
"Kusttram"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000474012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000535713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of dinosaur is mentioned on the screen?",
|
|
"answer": "stegosaurus",
|
|
"gt_answers": [
|
|
"RAPTORS?",
|
|
"RAPTORS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000174774.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What Airline is this plane for",
|
|
"answer": "air india",
|
|
"gt_answers": [
|
|
"Indigo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the top right say?",
|
|
"answer": "trade options",
|
|
"gt_answers": [
|
|
"Travel Options"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the marker?",
|
|
"answer": "pentel",
|
|
"gt_answers": [
|
|
"Sharpie"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06785654_4728.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photo listed in the middle of the image?",
|
|
"answer": "taytay dam",
|
|
"gt_answers": [
|
|
"HoangLong",
|
|
"HoangLong photo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/5541.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the player on the right?",
|
|
"answer": "sammy sosa",
|
|
"gt_answers": [
|
|
"SUZUKI"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the player on the left?",
|
|
"answer": "nine",
|
|
"gt_answers": [
|
|
"16"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are written on the white street sign on the left?",
|
|
"answer": "no numbers",
|
|
"gt_answers": [
|
|
"3100"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406478.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the white street sign on right?",
|
|
"answer": "way",
|
|
"gt_answers": [
|
|
"FILBERT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406478.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where does the middle lane lead to?",
|
|
"answer": "valparaiso",
|
|
"gt_answers": [
|
|
"Chetumal"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4887.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the lowest number marked on the cup?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_13293.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in the 1st picture?",
|
|
"answer": "double free",
|
|
"gt_answers": [
|
|
"DOUBLE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360656.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in the 2nd picture?",
|
|
"answer": "double free",
|
|
"gt_answers": [
|
|
"FAULT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360656.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the boat with green on it?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"vitoria"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000464075.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN GREEN CARPET?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"REGION AQUITAINE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000261788.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the number on the girl that is 3rd one from the left side of this picture?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"154"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which number is the bike that's 2nd in place?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"160",
|
|
"166"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the name brand on the side of the bikers shirts?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"JAMIS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN THE SIIDE",
|
|
"answer": "possi",
|
|
"gt_answers": [
|
|
"FOSSIL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03814906_10628.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the triangle on the sign?",
|
|
"answer": "lane",
|
|
"gt_answers": [
|
|
"PARE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370029.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team name is on the jerseys pictured?",
|
|
"answer": "orioles",
|
|
"gt_answers": [
|
|
"Orioles"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362858.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name printed on the jersey facing away from the viewer?",
|
|
"answer": "baltimore orioles",
|
|
"gt_answers": [
|
|
"Wieters"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362858.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the jersey for Wieters?",
|
|
"answer": "34",
|
|
"gt_answers": [
|
|
"32"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362858.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team does the standing player play for?",
|
|
"answer": "san francisco giants",
|
|
"gt_answers": [
|
|
"GIANTS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364612.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What credit card company logos are displayed?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Visa, Mastercard",
|
|
"VISA MASTERCARD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000241720.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the keyboard?",
|
|
"answer": "alpine",
|
|
"gt_answers": [
|
|
"ALIENWARE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336330.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the continent listed in the photo?",
|
|
"answer": "europe",
|
|
"gt_answers": [
|
|
"Central Europe"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335880.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What price is displayed?",
|
|
"answer": "$2",
|
|
"gt_answers": [
|
|
"$2"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001891.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many cats are there in this photo?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three",
|
|
"ONE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373100.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What s the name of the restaurant?",
|
|
"answer": "burger king",
|
|
"gt_answers": [
|
|
"Hlollabatar",
|
|
"HLOLLABALAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363220.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What beverage name is displayed in the window of the restaurant?",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"Coca Cola",
|
|
"CocaCola"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363220.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is this?",
|
|
"answer": "emirates",
|
|
"gt_answers": [
|
|
"Emirates"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the right of the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"panasonic"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211807.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does is the second word on the right of the photo?",
|
|
"answer": "airport",
|
|
"gt_answers": [
|
|
"ideas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211807.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the fourth word on the right of the photo?",
|
|
"answer": "airport",
|
|
"gt_answers": [
|
|
"life"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211807.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this event?",
|
|
"answer": "frisbee dog competition",
|
|
"gt_answers": [
|
|
"K9Frisbee"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000033444.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the road on the green sign?",
|
|
"answer": "las vegas blvd",
|
|
"gt_answers": [
|
|
"Bullhead Pkwy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357157.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is written on the rock?",
|
|
"answer": "los angeles",
|
|
"gt_answers": [
|
|
"BULLHEAD CITY, AZ."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357157.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text on the top of the image say?",
|
|
"answer": "make the bed every morning",
|
|
"gt_answers": [
|
|
"make the bed every morning"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348515.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter of the alphabet appears directly above this luggae?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"B"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013868.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is in blue?",
|
|
"answer": "m",
|
|
"gt_answers": [
|
|
"B"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823750_15022.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the author of these books?",
|
|
"answer": "john steinbeck",
|
|
"gt_answers": [
|
|
"Kay Redfield Jamison"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409845.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter at the top right of the image?",
|
|
"answer": "b",
|
|
"gt_answers": [
|
|
"c"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000404015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name at the top right of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"butt"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000404015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name at the top right of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"william"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000404015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the hat of the children?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"J"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713299.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the team the children play on?",
|
|
"answer": "little league",
|
|
"gt_answers": [
|
|
"Jets"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713299.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is displayed on the plaque?",
|
|
"answer": "1984",
|
|
"gt_answers": [
|
|
"1981"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713299.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three letters come before the word online?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"CRI"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02837789_7191.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the city on the box?",
|
|
"answer": "los angeles",
|
|
"gt_answers": [
|
|
"Gilroy"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03127925_4283.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year did the event take place?",
|
|
"answer": "2013",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1160075.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name at the top?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"tsonga"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000127098.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does vodafone mean?",
|
|
"answer": "voice over internet protocol",
|
|
"gt_answers": [
|
|
"vodafone"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000014152.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where can I buy a vodafone?",
|
|
"answer": "in the market",
|
|
"gt_answers": [
|
|
"vodafone"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000014152.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign board?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say in the top center of the image?",
|
|
"answer": "barber shop",
|
|
"gt_answers": [
|
|
"EXIT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02791270_514.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of business is the building on the left?",
|
|
"answer": "pharmacy",
|
|
"gt_answers": [
|
|
"Hotel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000476812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what brand is the black phone?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"nokia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373870.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of fuel does this car take?",
|
|
"answer": "unleaded gas",
|
|
"gt_answers": [
|
|
"Unleaded Fuel"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03841143_13177.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the way of mexico",
|
|
"answer": "to mexico",
|
|
"gt_answers": [
|
|
"Bridge 1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406290.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written on the back of the person's shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Laskar"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000060774.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is closed according to the sign?",
|
|
"answer": "road",
|
|
"gt_answers": [
|
|
"Road"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed above the plane windows?",
|
|
"answer": "japan airlines",
|
|
"gt_answers": [
|
|
"JAPAN AIR COMMUTER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000156895.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the jeans company in the background of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Muzu"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000392606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "This fire truck serves which area?",
|
|
"answer": "miami beach",
|
|
"gt_answers": [
|
|
"Coral Gables"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000501972.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are most brands to avoid plagiarism?",
|
|
"answer": "wind tunnel",
|
|
"gt_answers": [
|
|
"Patented"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000017231.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of books are indicated by the red sign in the top left of the image?",
|
|
"answer": "fiction",
|
|
"gt_answers": [
|
|
"NON-FICTION"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368062.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being cut?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Pizza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS TIME?",
|
|
"answer": "time is the passage of events",
|
|
"gt_answers": [
|
|
"3:20"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What business name is on the top of the building?",
|
|
"answer": "life insurance",
|
|
"gt_answers": [
|
|
"MetLife"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1650.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bottom of the gas pump?",
|
|
"answer": "ohio",
|
|
"gt_answers": [
|
|
"GWAY"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_22107.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does this sign say?",
|
|
"answer": "you've arrived",
|
|
"gt_answers": [
|
|
"you've arrived"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412562.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bus say above the door?",
|
|
"answer": "free nights",
|
|
"gt_answers": [
|
|
"Metroline"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3078.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the destination on the bus say?",
|
|
"answer": "free night bus",
|
|
"gt_answers": [
|
|
"205 via Kings Cross",
|
|
"Kings Cross"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3078.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the active ingredient?",
|
|
"answer": "oxybenzone",
|
|
"gt_answers": [
|
|
"aluminum zirconium trichlorohydrex Gly (20%) anhydrous",
|
|
"aluminum zirconium trichlorohydrex"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030844.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the phone number?",
|
|
"answer": "the phone number is 1-888-888-888",
|
|
"gt_answers": [
|
|
"1-800-964-1947"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030844.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "no train number",
|
|
"gt_answers": [
|
|
"055 05995"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373838.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is this on?",
|
|
"answer": "santa clara street",
|
|
"gt_answers": [
|
|
"Victoria St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the tail of the airplane say?",
|
|
"answer": "jetblue",
|
|
"gt_answers": [
|
|
"jetBlue"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000551214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit shape is the clock?",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"Apple"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335640.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the school days calendar for?",
|
|
"answer": "teachers",
|
|
"gt_answers": [
|
|
"LESSONS & CHALLENGES for Teachers"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335640.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What times are taped on the machine?",
|
|
"answer": "tokyo subway times",
|
|
"gt_answers": [
|
|
"8:00am-11:00am"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_1740.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is written on the book?",
|
|
"answer": "japan",
|
|
"gt_answers": [
|
|
"JAPAN"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03179701_5399.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company name is on the Jumbo-Sized TV remote?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"INNOVAGE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "THe LITEON TV remote is located between which two remotes?",
|
|
"answer": "person and person",
|
|
"gt_answers": [
|
|
"PIONEER TOSHIBA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer who took this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Mani Babbar"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the world's third highest pass called?",
|
|
"answer": "khardung la",
|
|
"gt_answers": [
|
|
"Chang La"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter comes after the apostrophe?",
|
|
"answer": "z",
|
|
"gt_answers": [
|
|
"S"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03690938_2209.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What business is this person eating at?",
|
|
"answer": "donuts",
|
|
"gt_answers": [
|
|
"DUNKIN DONUTS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000010579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of sandwich does the bag advertise?",
|
|
"answer": "donut",
|
|
"gt_answers": [
|
|
"CHICKEN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000010579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What animal is this?",
|
|
"answer": "skunk",
|
|
"gt_answers": [
|
|
"Skunk"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02445715_144.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is this photo for?",
|
|
"answer": "subway",
|
|
"gt_answers": [
|
|
"Nathan's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322772.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the big black words on the sign?",
|
|
"answer": "bus stop",
|
|
"gt_answers": [
|
|
"BUS STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000038435.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the ski helmet?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Uvex"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406445.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street name is pictured?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"Garfield Avenue"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403690.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What restaurant is advertised at the bottom of this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Taco Bell"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000532355.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of photography is Purple Martini?",
|
|
"answer": "wedding photography",
|
|
"gt_answers": [
|
|
"Wedding"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what date does the sign say?",
|
|
"answer": "june 15",
|
|
"gt_answers": [
|
|
"june 13-17 2006",
|
|
"June 13 - 17, 2006"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02100877_2338.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Red sign found on a intersection",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337559.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the man's shirt say?",
|
|
"answer": "cash",
|
|
"gt_answers": [
|
|
"Cash"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326418.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand appears to to be sponsored in the background?",
|
|
"answer": "volvo",
|
|
"gt_answers": [
|
|
"Fella"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03496892_10844.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the tractor?",
|
|
"answer": "fendt",
|
|
"gt_answers": [
|
|
"MCCORMICK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03496892_10844.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit for a left turn?",
|
|
"answer": "30 mph",
|
|
"gt_answers": [
|
|
"25 MPH",
|
|
"25"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353013.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last four letters of produce?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"duce"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which beer brand is being advertised above the two Coca-Cola advertisements?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"COORS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word at the top of the right most advertisement on the top row?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"lucky"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this shirt represent",
|
|
"answer": "abvi goodwill",
|
|
"gt_answers": [
|
|
"Goodwill"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000692.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does he need donations for?",
|
|
"answer": "he is homeless",
|
|
"gt_answers": [
|
|
"nice dinner"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375759.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which fruit is depicted in this picture?",
|
|
"answer": "banana",
|
|
"gt_answers": [
|
|
"Banana"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462736.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate on the bus?",
|
|
"answer": "dmc 005",
|
|
"gt_answers": [
|
|
"LT02 ZDR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the destination of the bus?",
|
|
"answer": "manchester",
|
|
"gt_answers": [
|
|
"Manchester"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of the bus?",
|
|
"answer": "mta bus",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of gel is the Ice Cold product",
|
|
"answer": "epsom salt",
|
|
"gt_answers": [
|
|
"Analgesic"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03742115_6488.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Does this boat make you happy?",
|
|
"answer": "yes",
|
|
"gt_answers": [
|
|
"HAPPY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000400275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000201236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the bus?",
|
|
"answer": "people",
|
|
"gt_answers": [
|
|
"Tata"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the name of the bridge?",
|
|
"answer": "hudson river bridge",
|
|
"gt_answers": [
|
|
"Lions Gate",
|
|
"Lions Gate Bridge"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336569.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the ping-pong table?",
|
|
"answer": "table tennis table",
|
|
"gt_answers": [
|
|
"sponeta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000161028.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what kind of establishment is this?",
|
|
"answer": "restaurant",
|
|
"gt_answers": [
|
|
"restaurant"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372333.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the car's license plate?",
|
|
"answer": "mcfly",
|
|
"gt_answers": [
|
|
"3XHE872"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326312.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the middle school bus's number?",
|
|
"answer": "a yellow school bus",
|
|
"gt_answers": [
|
|
"566"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150399.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kink of bus is shown?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"School bus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150399.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this street?",
|
|
"answer": "cedar street",
|
|
"gt_answers": [
|
|
"Cedar Street",
|
|
"CEDAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406371.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on this green sign board?",
|
|
"answer": "ceara street",
|
|
"gt_answers": [
|
|
"Cedar Street"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406371.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this cereal?",
|
|
"answer": "kashi",
|
|
"gt_answers": [
|
|
"Special K"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store name is on the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"The Honesty Shop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000272218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"30"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358556.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is found on the player wearing red",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000129722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the player to the furthest left wearing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"27"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000129722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the top left?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"+30"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000379159.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is center bottom?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"0"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000379159.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed at the bottom left of the image?",
|
|
"answer": "ski lift",
|
|
"gt_answers": [
|
|
"2012 Jerimiah Fulton",
|
|
"\ufffd\ufffd 2012 Jerimiah Fulton"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What tour bus is this?",
|
|
"answer": "official tour bus",
|
|
"gt_answers": [
|
|
"City Sightseeing"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415679.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say under the clock?",
|
|
"answer": "faceable",
|
|
"gt_answers": [
|
|
"Rolex"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which company is written on the watermark?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"PanArmenian Photo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360520.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF FUEL STATION?",
|
|
"answer": "shell",
|
|
"gt_answers": [
|
|
"INDIANOIL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000330572.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of guitar is in this image?",
|
|
"answer": "acoustic guitar",
|
|
"gt_answers": [
|
|
"ACOUSTIC"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011978.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the shirt of the man behind the fence?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FLYING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000502517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the red sign?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"257"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353917.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is after the R on the poster",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_269.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the Slimming lettering",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"white"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_269.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is spray painted on the red sign?",
|
|
"answer": "hammer time",
|
|
"gt_answers": [
|
|
"HAMMERTIME"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it says in the banner?",
|
|
"answer": "grand slam return",
|
|
"gt_answers": [
|
|
"Grand Sla return"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000465090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "delta airlines",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373278.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of the plane?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"N9488P"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the mountain on the red sticker?",
|
|
"answer": "mont blanc",
|
|
"gt_answers": [
|
|
"Mt. Fuji"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year written on the tall boy's sweater?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"1975"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sign say?",
|
|
"answer": "exit",
|
|
"gt_answers": [
|
|
"exit"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_210.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company owns this jet?",
|
|
"answer": "airline",
|
|
"gt_answers": [
|
|
"air canada"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056065.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the truck say?",
|
|
"answer": "eat plants",
|
|
"gt_answers": [
|
|
"GERANIUMS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337133.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license number?",
|
|
"answer": "tb tb",
|
|
"gt_answers": [
|
|
"ao-068-kh",
|
|
"A0 068 KH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000403263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is at the top of the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"global ENGLISH SCHOOL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000011697.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bus company?",
|
|
"answer": "andrews tours",
|
|
"gt_answers": [
|
|
"Andesmar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "anderson",
|
|
"gt_answers": [
|
|
"259"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How can you contact this company?",
|
|
"answer": "they have a website",
|
|
"gt_answers": [
|
|
"www.andesmar.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the rider?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"G.MADEHO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000153634.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what name is on the jockys pants?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"G.MADEHO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000153634.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on the birthday cake?",
|
|
"answer": "happy birthday",
|
|
"gt_answers": [
|
|
"Lizzie"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317178.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number on the building?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"33"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What was the street name written?",
|
|
"answer": "williams st",
|
|
"gt_answers": [
|
|
"WILLIS ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is indicated in the red sign board?",
|
|
"answer": "cable car",
|
|
"gt_answers": [
|
|
"CABLE CAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the wall besides sign board?",
|
|
"answer": "cable car",
|
|
"gt_answers": [
|
|
"WATCHES"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign protesting?",
|
|
"answer": "london congestion charge",
|
|
"gt_answers": [
|
|
"bike PARKING TAX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is listed in the sidecar windshield?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"IVAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company name is shown ion the black background building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"T.M.LEW"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is straight ahead?",
|
|
"answer": "durum stockund",
|
|
"gt_answers": [
|
|
"DJURSHOLM STOCKSUND"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346766.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is the tea advertised on the bus?",
|
|
"answer": "50p",
|
|
"gt_answers": [
|
|
"99"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000459082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are wearing hats?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Two",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number can be seen on top?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03075370_3057.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street printed on the sign?",
|
|
"answer": "wooster street",
|
|
"gt_answers": [
|
|
"WOOSTER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319254.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the person listed at the bottom of the photo?",
|
|
"answer": "john",
|
|
"gt_answers": [
|
|
"Jim"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01608432_5084.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the umpires sleeve?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"58",
|
|
"52"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368873.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the bottom?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"paribas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000177957.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the blue bus number?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"9508"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is it day or night?",
|
|
"answer": "night",
|
|
"gt_answers": [
|
|
"Night"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/16.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number below the windshield on the bus closest to you in the image.",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"RB-4",
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327747.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name under books on the wall?",
|
|
"answer": "a bookstore",
|
|
"gt_answers": [
|
|
"Kinokuniya"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_44.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "kobo",
|
|
"gt_answers": [
|
|
"Kinokuniya"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_44.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who is this a memorial too?",
|
|
"answer": "victoria wash",
|
|
"gt_answers": [
|
|
"hiram bingham"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_14020.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date of this event?",
|
|
"answer": "4 4",
|
|
"gt_answers": [
|
|
"JULY 4TH 2009"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000345417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the blender?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Blendtec home"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000498409.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the lable to the left say?",
|
|
"answer": "ovens",
|
|
"gt_answers": [
|
|
"The Wu"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376878.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the cinema?",
|
|
"answer": "wilshire",
|
|
"gt_answers": [
|
|
"Wilshire"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_28442.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top of the sign?",
|
|
"answer": "wilshire",
|
|
"gt_answers": [
|
|
"RIGHT AT YOUR DOOR"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_28442.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bottom of the sign?",
|
|
"answer": "death at the door",
|
|
"gt_answers": [
|
|
"DEATH AT A FUNERAL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_28442.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the full word seen in the background on the photo to the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"owls"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000335660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the full word seen in the background on the photo to the right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"owls"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000335660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What drink is in the mug?",
|
|
"answer": "coffee",
|
|
"gt_answers": [
|
|
"Coffee"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03085013_19707.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is flying towards the man?",
|
|
"answer": "a tennis ball",
|
|
"gt_answers": [
|
|
"Tennis ball"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the T-shirt say?",
|
|
"answer": "lego party",
|
|
"gt_answers": [
|
|
"Jr's Garbage"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the shirt?",
|
|
"answer": "happy",
|
|
"gt_answers": [
|
|
"Jr's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Mention any one name written on the sticks",
|
|
"answer": "the name of a baseball player",
|
|
"gt_answers": [
|
|
"Wilson"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362657.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the mobile phone?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"NOKIA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03777754_2068.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the capital letter on the second word of the store's name?",
|
|
"answer": "saloon",
|
|
"gt_answers": [
|
|
"S"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367520.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What part of the military does this plane belong to?",
|
|
"answer": "navy",
|
|
"gt_answers": [
|
|
"NAVY"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04552348_1274.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"Stop testan"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What bank is advertised in the dug out?",
|
|
"answer": "bank of america",
|
|
"gt_answers": [
|
|
"Bank of America"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000500962.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the t-shirt say?",
|
|
"answer": "warning",
|
|
"gt_answers": [
|
|
"Warning",
|
|
"warning !"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016502.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many players are there whose feet are completely off the ground?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"two"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337457.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sign say?",
|
|
"answer": "exit",
|
|
"gt_answers": [
|
|
"Exit"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"31233"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000135086.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this bus number?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"38"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350689.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"Victoria"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350689.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the bus?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"LJ59GUA",
|
|
"LJ59 GUA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350689.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white vase say?",
|
|
"answer": "love",
|
|
"gt_answers": [
|
|
"Love"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What signis posted on the freezer door?",
|
|
"answer": "private property",
|
|
"gt_answers": [
|
|
"Private Property No Trespassing"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353516.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the lens caps?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Canon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000523684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word written on the top of the remote control?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"Power"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333488.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the dirtbike?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000217094.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the oven?",
|
|
"answer": "an oven",
|
|
"gt_answers": [
|
|
"Vulcan"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374883.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the upper left corner of this photo?",
|
|
"answer": "skateboarding",
|
|
"gt_answers": [
|
|
"MyProfe"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323685.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text on the plane say?",
|
|
"answer": "indigo",
|
|
"gt_answers": [
|
|
"indigo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000680.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the green sign in the upper left?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402386.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the jersey number?",
|
|
"answer": "7",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n09835506_19202.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the crosswalk sign?",
|
|
"answer": "crosswalk",
|
|
"gt_answers": [
|
|
"DONT WALK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the website that this photo is from according to the bottom corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"stpaulphotos.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000517510.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website?",
|
|
"answer": "the website",
|
|
"gt_answers": [
|
|
"www.shutterstock.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07714571_13102.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are located on the umbrella?",
|
|
"answer": "bar a ristorante",
|
|
"gt_answers": [
|
|
"BAR & RISTORANTE"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1243.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first character on the bus's licence plate?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"H"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000529470.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are printed on the poster?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"rom project"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000387517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on Green board",
|
|
"answer": "welcome to motel",
|
|
"gt_answers": [
|
|
"WESTWARD HO MOTEL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351754.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"335"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353849.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What hotel is being shown",
|
|
"answer": "marriott marriott marriott",
|
|
"gt_answers": [
|
|
"Marriott"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000726.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does that blue sign say?",
|
|
"answer": "a tennis court",
|
|
"gt_answers": [
|
|
"POLO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373707.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What emergency service is the van used for?",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"Police"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03977966_5355.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline written on the plane?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Jetblue."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000186740.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What game system is the box for?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"Wii"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000459819.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What game is included in the box?",
|
|
"answer": "wii sports",
|
|
"gt_answers": [
|
|
"Wii Sports"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000459819.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does Perry's Place serve?",
|
|
"answer": "food and soup",
|
|
"gt_answers": [
|
|
"FOOD & SOUP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363173.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red street sign instructing you to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363173.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is up at bat?",
|
|
"answer": "a baseball player",
|
|
"gt_answers": [
|
|
"Paulino"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04149813_5234.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team is in the field?",
|
|
"answer": "the texans",
|
|
"gt_answers": [
|
|
"Mariners"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04149813_5234.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team is batting?",
|
|
"answer": "the phillies",
|
|
"gt_answers": [
|
|
"Pirates"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04149813_5234.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the closest pizza box in green letters?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"HOME OF POKEY STIX!!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company in the building?",
|
|
"answer": "steelcase",
|
|
"gt_answers": [
|
|
"Chicago Sun-Times"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000356615.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is Jennifer in 2011?",
|
|
"answer": "in japan",
|
|
"gt_answers": [
|
|
"Kusatsu"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000222913.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the brown sign on the left pointing to?",
|
|
"answer": "smith's market inc",
|
|
"gt_answers": [
|
|
"Dr Eugene Clark Library"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who's market is being advertised here?",
|
|
"answer": "smith's market",
|
|
"gt_answers": [
|
|
"Smitty's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of food in Smitty's is Fresh choice?",
|
|
"answer": "fast food",
|
|
"gt_answers": [
|
|
"Meat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is this airplane from?",
|
|
"answer": "klm airlines",
|
|
"gt_answers": [
|
|
"KLM Asia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the bus below the windshield?",
|
|
"answer": "77",
|
|
"gt_answers": [
|
|
"2178"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318718.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number in orange is above the windshield?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"19"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318718.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to work",
|
|
"gt_answers": [
|
|
"METROTOWN STN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318718.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time of day - AM or PM - is next to the barcode?",
|
|
"answer": "am",
|
|
"gt_answers": [
|
|
"AM"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013428.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the likely full text of the large text in the reddish orange area below?",
|
|
"answer": "value great savings pack",
|
|
"gt_answers": [
|
|
"Valu Great Savin Pac"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013428.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of cell phone is on the table?",
|
|
"answer": "blackberry",
|
|
"gt_answers": [
|
|
"SAMSUNG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000081031.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"SPRING ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285738.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the venue?",
|
|
"answer": "reel big fish",
|
|
"gt_answers": [
|
|
"Theater Living Arts"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_70433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What band is playing thursday?",
|
|
"answer": "the big fish",
|
|
"gt_answers": [
|
|
"REEL BIG FISH"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_70433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are colours of device?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"black",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264406.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the phone?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Samsung"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264406.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company owns the bus?",
|
|
"answer": "taipei metro",
|
|
"gt_answers": [
|
|
"Turis Tour",
|
|
"TurisTour"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362511.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is being advertised?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"Banana",
|
|
"bananas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000422705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What watermark is displayed at the top?",
|
|
"answer": "istockphoto",
|
|
"gt_answers": [
|
|
"gettyimages"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03787032_344.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is on the cover of the magazine?",
|
|
"answer": "superman",
|
|
"gt_answers": [
|
|
"Superman"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_1137.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the magazine publisher?",
|
|
"answer": "time inc",
|
|
"gt_answers": [
|
|
"Time"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_1137.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in blue board",
|
|
"answer": "it is a parade",
|
|
"gt_answers": [
|
|
"PLAZA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the time limit?",
|
|
"answer": "60 minutes",
|
|
"gt_answers": [
|
|
"TWO HOUR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000057308.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this picture taken?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337780.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are displayed inside the green sign at the top right of the image?",
|
|
"answer": "grizzly bear",
|
|
"gt_answers": [
|
|
"Bear and Wolf Museum"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406383.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the sculpture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ecalite"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000396977.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what country is this police station in?",
|
|
"answer": "india",
|
|
"gt_answers": [
|
|
"MUMB"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the baseball cap?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"C"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000464183.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the net weight",
|
|
"answer": "0 oz net weight",
|
|
"gt_answers": [
|
|
"32 oz",
|
|
"32 OZ (2LB) 907 G"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019388.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the copyrighted name displayed in the bottom left?",
|
|
"answer": "elizabeth taylor",
|
|
"gt_answers": [
|
|
"Dan Kaiser"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02013706_991.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What food is in the can?",
|
|
"answer": "chick peas",
|
|
"gt_answers": [
|
|
"Chick peas"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001870.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the writing on the side of the wagon say?",
|
|
"answer": "horse drawn carriage",
|
|
"gt_answers": [
|
|
"LMS Railway Co 22465",
|
|
"L M S RAILWAY CO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03538406_9872.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made the laptop in this photograph?",
|
|
"answer": "dell",
|
|
"gt_answers": [
|
|
"DELL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is presenting the dinner?",
|
|
"answer": "hublot",
|
|
"gt_answers": [
|
|
"Hublot"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292923.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car brand is the dinner for?",
|
|
"answer": "hublot",
|
|
"gt_answers": [
|
|
"Ferrari"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292923.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the dinner for?",
|
|
"answer": "the hublot fc members",
|
|
"gt_answers": [
|
|
"FERRARI OWNERS CLUB"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292923.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year other than 2005 is listed on the boy's basketball shirt?",
|
|
"answer": "2006",
|
|
"gt_answers": [
|
|
"2006"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000478582.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the pedestrian sign say?",
|
|
"answer": "crosswalk",
|
|
"gt_answers": [
|
|
"Walk"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361282.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus schedule number?",
|
|
"answer": "the bus schedule number is",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361282.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "a city",
|
|
"gt_answers": [
|
|
"Jeffery Exp"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361282.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the signs say is free?",
|
|
"answer": "free food",
|
|
"gt_answers": [
|
|
"Blood & Landshark"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371549.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the building say?",
|
|
"answer": "amusement park",
|
|
"gt_answers": [
|
|
"Savoy"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02966193_17538.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where did this photo come from?",
|
|
"answer": "germany",
|
|
"gt_answers": [
|
|
"www.pjjphotography.co.uk"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02966193_17538.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the bottom right?",
|
|
"answer": "arrow",
|
|
"gt_answers": [
|
|
"cnsphoto"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02879718_5117.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much are the bananas being sold for?",
|
|
"answer": "$1.50 each",
|
|
"gt_answers": [
|
|
"1,50"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358903.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much are the ranges being sold for?",
|
|
"answer": "they are being sold for a dollar",
|
|
"gt_answers": [
|
|
"1,50"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358903.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what country are the bananas from?",
|
|
"answer": "south america",
|
|
"gt_answers": [
|
|
"Equador"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358903.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the address for the website on this image?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"2dehands.be"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04026417_12231.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text found in the middle of the watch on the necktie?",
|
|
"answer": "the time is now",
|
|
"gt_answers": [
|
|
"The New Era U.S.A."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000088871.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the green skateboard?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Atwater"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000433715.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which of the companies on the wall are a camera manufacturer",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Canon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000050470.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the vehicle?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"GREEN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word on the vehicle?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"MACHINES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word below the name on the building?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"salons"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the male's grey, hooded jersey?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"PHILADELPHIA 76ERS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four words are written in white, on the black sign in the background to the left?",
|
|
"answer": "expo center",
|
|
"gt_answers": [
|
|
"MULTIPLY YOUR DIGITAL LIFE.",
|
|
"MULTIPLY YOUR DIGITAL LIFE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is the photo from?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"www.ehsy.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02730930_36121.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is shown in front of the woman?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"www.ehsy.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02730930_36121.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the large letters on the black apron that is hanging near a cupboard?",
|
|
"answer": "baker",
|
|
"gt_answers": [
|
|
"UPE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333489.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Evergreen Textile UK Limited"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000033329.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the pole of the stoplight?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000033329.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Q: What is written on the cooks shirt? A: Pact",
|
|
"answer": "a pact a pact is a pact made between a person and a person",
|
|
"gt_answers": [
|
|
"PACT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055135.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Q: What brand of charcoal is being used? A: Kingsford",
|
|
"answer": "a kingsford charcoal grill",
|
|
"gt_answers": [
|
|
"KINGSFORD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055135.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is a white dog holding in his mouth?",
|
|
"answer": "a green frisbee",
|
|
"gt_answers": [
|
|
"frisbee"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327581.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the napkin?",
|
|
"answer": " ",
|
|
"gt_answers": [
|
|
"Passover Haggadah"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the back of the truck?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Frontier"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000162046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say is being sold there?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"Bananas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"Bananas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written there in yellow letters?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"Bananas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference?",
|
|
"answer": "the sign of board reference",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317410.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What place is labeled on the packaged napkin?",
|
|
"answer": "mr green",
|
|
"gt_answers": [
|
|
"TULLY COFEE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276037.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which letters are shown on the side of the plastic cup?",
|
|
"answer": "m",
|
|
"gt_answers": [
|
|
"TULLY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276037.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "To which mountaintop does the sign point, in English?",
|
|
"answer": "the sign points to a mountaintop",
|
|
"gt_answers": [
|
|
"Mushroom Peak"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/8549.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many meters must be traveled to the mountaintop?",
|
|
"answer": "900 meters",
|
|
"gt_answers": [
|
|
"3200 m",
|
|
"3200"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/8549.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "mc pig",
|
|
"gt_answers": [
|
|
"258-ZNF"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign in front of the vehicle?",
|
|
"answer": "pig bus",
|
|
"gt_answers": [
|
|
"Mr. Porkchop"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "flinders street",
|
|
"gt_answers": [
|
|
"church"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000188165.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "According to the signage, what is straight ahead?",
|
|
"answer": "truth or consequences",
|
|
"gt_answers": [
|
|
"CITY OF ELEPHANT BUTTE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cycle name?",
|
|
"answer": "tandem bicycle",
|
|
"gt_answers": [
|
|
"CO-MOTION CYCLE",
|
|
"CO-MOTION"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02835271_976.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company on the cup?",
|
|
"answer": "pepsi",
|
|
"gt_answers": [
|
|
"Pepsi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404234.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns the copyright ?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Vicor Goncalves"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02002556_1234.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What county is the bus transportation for?",
|
|
"answer": "county of los angeles",
|
|
"gt_answers": [
|
|
"ORANGE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000111604.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Robert McConnell"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000111604.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the large number written on the paper?",
|
|
"answer": "6",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000217959.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What unit of measurement does this pump use?",
|
|
"answer": "gallons",
|
|
"gt_answers": [
|
|
"Gallons"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_19060.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the Side of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Scottish"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000419815.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of lens cap is that?",
|
|
"answer": "nikon lens cap",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_8268.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which box contains the word \"library\"?",
|
|
"answer": "tv in library",
|
|
"gt_answers": [
|
|
"Library",
|
|
"MEDIA LIBRARY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000049713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which box contains the phrase \"MythTv\"?",
|
|
"answer": "mythtv box",
|
|
"gt_answers": [
|
|
"mythTV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000049713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which box contains the word \"recordings\"?",
|
|
"answer": "tv remote",
|
|
"gt_answers": [
|
|
"Recordings",
|
|
"MANAGE RECORDINGS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000049713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Oriana Bathroom?",
|
|
"answer": "an interactive bathroom",
|
|
"gt_answers": [
|
|
"Interactive"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04493381_50366.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the words in the left corner?",
|
|
"answer": "the oriana interactive bathroom",
|
|
"gt_answers": [
|
|
"THE LOFT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04493381_50366.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the middle sign on the building say?",
|
|
"answer": "west side bem",
|
|
"gt_answers": [
|
|
"E Barato"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2426.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the highest sign on the building say?",
|
|
"answer": "west side bem",
|
|
"gt_answers": [
|
|
"Vestir Bem"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2426.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which city is in the name of the hot pot shot?",
|
|
"answer": "hong kong",
|
|
"gt_answers": [
|
|
"SEOUL"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_469.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are legible to the left of the tennis player?",
|
|
"answer": "qatar",
|
|
"gt_answers": [
|
|
"OSBA ADER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578119.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbered street is shown on the street signs?",
|
|
"answer": "2nd street",
|
|
"gt_answers": [
|
|
"21st St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last 4 numbers of the phone number on the sign?",
|
|
"answer": "212",
|
|
"gt_answers": [
|
|
"5900"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117796.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year what the photo taken",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000058926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer listed in the bottom right corner",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Nengah Januartha"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000058926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is being passed?",
|
|
"answer": "main street",
|
|
"gt_answers": [
|
|
"9TH STREET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000218455.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "In which city is the guy working?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"City of SF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355540.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the guys shop?",
|
|
"answer": "he is a taxi driver",
|
|
"gt_answers": [
|
|
"Parking Meter Repair"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355540.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Name the first number on the license plate?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355540.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the guy with the red shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000333595.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which number is written on the green cartel in the background?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"400"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000333595.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is funding the project?",
|
|
"answer": "the city",
|
|
"gt_answers": [
|
|
"AMERICAN RECOVERY AND REINVESTMENT ACT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the slogan of the project?",
|
|
"answer": "a better way",
|
|
"gt_answers": [
|
|
"Putting America to Work"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the green building?",
|
|
"answer": "street",
|
|
"gt_answers": [
|
|
"EASTERN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316446.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are written on the sign hanging from the traffic light?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"PED XING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316446.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written is red writing on the above banner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Silent Zone"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the hospital named after?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"San Ignacio"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the street sign on the bottom pointing to?",
|
|
"answer": "hospital",
|
|
"gt_answers": [
|
|
"Hecopab"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on front of truck",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"over size"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000548183.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bus?",
|
|
"answer": "the number on the bus is 40",
|
|
"gt_answers": [
|
|
"1552"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378906.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the remote?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Humax"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000137096.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the plane?",
|
|
"answer": "vw",
|
|
"gt_answers": [
|
|
"D-Misy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353629.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the laptop in the picture?",
|
|
"answer": "acer",
|
|
"gt_answers": [
|
|
"acer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which search engine is powering this phone?",
|
|
"answer": "yahoo",
|
|
"gt_answers": [
|
|
"Yahoo"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018379.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the brown and white bag?",
|
|
"answer": "guess",
|
|
"gt_answers": [
|
|
"GUESS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04026417_2867.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who is the player",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"braun"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the players number",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what company logo is behind player",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"us bank"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the uniform number of the catcher?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"19"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346930.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the uniform number of the sliding player?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346930.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is this plane from?",
|
|
"answer": "avance",
|
|
"gt_answers": [
|
|
"Avianca"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409501.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the plane have written on it?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"U.S. Air force"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293880.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city are these signs in?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"London"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339378.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the complete name of this product?",
|
|
"answer": "coca cola classic",
|
|
"gt_answers": [
|
|
"Coca-Cola Classic",
|
|
"COCA-COLA"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the innitals on the helment?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"AC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000309461.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on board",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362278.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the ball?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"Gilbert"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04118538_3212.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "united airlines",
|
|
"gt_answers": [
|
|
"United"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322205.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the license plate say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"NDK-611",
|
|
"NDK 611"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000226350.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the link shown in the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WWW.WESTHOEK.BE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04523525_10714.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the tag?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"620"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000391632.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the clock?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Royal Arcade"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000202270.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "berlin germany",
|
|
"gt_answers": [
|
|
"Solingen Hbf"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000233341.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color shirt is the person wearing?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407473.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the boat?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"683311"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416761.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the license plate say?",
|
|
"answer": "elf",
|
|
"gt_answers": [
|
|
"BPE N2H",
|
|
"BPE 112H"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top of the bus say?",
|
|
"answer": "elf",
|
|
"gt_answers": [
|
|
"Elf Team Tyrrell"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT BRANCH OF THE SERVICE ARE THESE MEN FROM?",
|
|
"answer": "army",
|
|
"gt_answers": [
|
|
"air national guard AIR NATIONAL GUARD",
|
|
"AIR NATIONAL GUARD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000193654.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "jhb 005",
|
|
"gt_answers": [
|
|
"KNL 665"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000536280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus manufacture company?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"vanHool"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000536280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company that own the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"scotline tours"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000536280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this mouse?",
|
|
"answer": "microsoft",
|
|
"gt_answers": [
|
|
"DELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000139914.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who is the player number 3?",
|
|
"answer": "catcher",
|
|
"gt_answers": [
|
|
"stoiffer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402776.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the wall?",
|
|
"answer": "revolution",
|
|
"gt_answers": [
|
|
"power of revoluti"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on this image?",
|
|
"answer": "fire hydrant",
|
|
"gt_answers": [
|
|
"Dustin Diaz"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341952.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"RUELLE ELVIS LIVES LANE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000333841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "a motorcycle store",
|
|
"gt_answers": [
|
|
"Guess"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355126.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the bathroom cleaner?",
|
|
"answer": "ariel",
|
|
"gt_answers": [
|
|
"Flash"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000417105.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the avenue?",
|
|
"answer": "wilson avenue",
|
|
"gt_answers": [
|
|
"McGill College"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319828.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boulevard?",
|
|
"answer": "wilshire boulevard",
|
|
"gt_answers": [
|
|
"MAISONNEUVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319828.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of this lock?",
|
|
"answer": "master",
|
|
"gt_answers": [
|
|
"Master"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03075370_4982.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "pet rest area",
|
|
"gt_answers": [
|
|
"PET REST AREA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000454068.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white?",
|
|
"answer": "pet rest area",
|
|
"gt_answers": [
|
|
"PET REST AREA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000454068.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whose rest area is this?",
|
|
"answer": "pet rest area",
|
|
"gt_answers": [
|
|
"PET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000454068.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red word on the circle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"York"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347055.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of a cheese calzone?",
|
|
"answer": "$11 at person's pizza",
|
|
"gt_answers": [
|
|
"28"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000091725.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does a square pie come with?",
|
|
"answer": "a side salad",
|
|
"gt_answers": [
|
|
"Specialty Topping"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000091725.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What costs 32.00?",
|
|
"answer": "a large pizza",
|
|
"gt_answers": [
|
|
"Cheese calzone w/ 1 topping",
|
|
"CHEESE W/ 1 TOPPING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000091725.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of a single room?",
|
|
"answer": "the price of a single room ranges from $80 to $100",
|
|
"gt_answers": [
|
|
"$19.95",
|
|
"19.95"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000815.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the motel?",
|
|
"answer": "all star",
|
|
"gt_answers": [
|
|
"Blue Swallow Motel",
|
|
"BLUE SWALLOW"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000815.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters and/or numbers on the license plate?",
|
|
"answer": "a b c",
|
|
"gt_answers": [
|
|
"7848WI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000218870.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red traffic sign read?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323999.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who took this picture?",
|
|
"answer": "weather k",
|
|
"gt_answers": [
|
|
"Wernher Krutein"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02640242_45953.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "torrie way",
|
|
"gt_answers": [
|
|
"TORRIE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06794110_5582.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word on the blue shirt?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Does"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the wall?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"13"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year did the bobsled run take place?",
|
|
"answer": "1980",
|
|
"gt_answers": [
|
|
"1980"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02860847_20726.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the bus?",
|
|
"answer": "tlh",
|
|
"gt_answers": [
|
|
"DRD130"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04487081_4491.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the date specified on this image?",
|
|
"answer": "1 july 2009",
|
|
"gt_answers": [
|
|
"20.3.2007"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254615.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the hand shaped sign?",
|
|
"answer": "no skateboarding",
|
|
"gt_answers": [
|
|
"Go Ord"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the Pitcher's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"39"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056859.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three digit number is on the back fence?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"395"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056859.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are visible on the top of the sign, to the right of the pole?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ticians"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000562517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made the policy car in the photograph?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"Ford"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326583.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the telephone number for the police?",
|
|
"answer": "911",
|
|
"gt_answers": [
|
|
"1-877-4FPS-411"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326583.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of beer are they drinking?",
|
|
"answer": "bud light",
|
|
"gt_answers": [
|
|
"RED",
|
|
"red eye"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000474637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the textbook on the top of the stack?",
|
|
"answer": "physics for dummies",
|
|
"gt_answers": [
|
|
"sign SOLUTIONS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000474637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the camera?",
|
|
"answer": "polaroid corporation",
|
|
"gt_answers": [
|
|
"Polaroid"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03976467_1694.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the side of the box?",
|
|
"answer": "660",
|
|
"gt_answers": [
|
|
"690"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03976467_1694.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on this building?",
|
|
"answer": "city hall",
|
|
"gt_answers": [
|
|
"investments."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319405.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first three letters after the word SUPER?",
|
|
"answer": "su",
|
|
"gt_answers": [
|
|
"HER"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the store on the right called?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"Calvin Klein"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_764.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the store on the right selling?",
|
|
"answer": "watches",
|
|
"gt_answers": [
|
|
"Underwear"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_764.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who created this image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Jason Mayes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000238568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats is the vehicle number plate",
|
|
"answer": "the vehicle number plate is tbc",
|
|
"gt_answers": [
|
|
"PT-15331",
|
|
"PT - 15331"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410177.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company's brand logo is in the picture?",
|
|
"answer": "american original instruments",
|
|
"gt_answers": [
|
|
"Gibson"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02787622_5669.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many calories per serving?",
|
|
"answer": "260 calories per serving",
|
|
"gt_answers": [
|
|
"180"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000894.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much dietary fiber per serving?",
|
|
"answer": "4 g",
|
|
"gt_answers": [
|
|
"5g"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000894.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many grams of trans fat per serving?",
|
|
"answer": "2 grams",
|
|
"gt_answers": [
|
|
"0"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000894.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of Jean?",
|
|
"answer": "levi straits co",
|
|
"gt_answers": [
|
|
"Levi Strauss & CO."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the brand located?",
|
|
"answer": "denver, colorado",
|
|
"gt_answers": [
|
|
"San Francisco Cal."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign asking you to do?",
|
|
"answer": "buy tickets",
|
|
"gt_answers": [
|
|
"Purchase Tickets"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1258.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What cross street is in front of the camera person?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SULPHUR AVE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000524676.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What toy brand made the green bear?",
|
|
"answer": "beanie babies",
|
|
"gt_answers": [
|
|
"ty"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000137999.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the fridge?",
|
|
"answer": "love",
|
|
"gt_answers": [
|
|
"Am"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of cake is on the table?",
|
|
"answer": "red velvet cake",
|
|
"gt_answers": [
|
|
"red velvet layer cake"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000387102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What web site can I go for more info?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"kittyscuquis.wordpress.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000387102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of pride?",
|
|
"answer": "usc pride",
|
|
"gt_answers": [
|
|
"USC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364298.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percent of Vitamin A is there?",
|
|
"answer": "vitamin a",
|
|
"gt_answers": [
|
|
"0%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003550.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percent of Vitamin C is there?",
|
|
"answer": "5%",
|
|
"gt_answers": [
|
|
"6%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003550.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percent of Calcium is there?",
|
|
"answer": "5%",
|
|
"gt_answers": [
|
|
"0%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003550.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Baseball"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356681.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the logo in the background",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"Live hard drive fast"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348203.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the wall in white?",
|
|
"answer": "baseball game",
|
|
"gt_answers": [
|
|
"PNCBANK",
|
|
"PNC BANK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407549.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number written on the woman's thigh?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"12"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the 3-letter text written in yellow on the woman's sticks?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LEK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this picture taken?",
|
|
"answer": "2013",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who gave permission to reproduce this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"becky moody"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of trips are to the left?",
|
|
"answer": "tours",
|
|
"gt_answers": [
|
|
"Boat",
|
|
"CANAL BOAT TRIPS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377279.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this product?",
|
|
"answer": "coffee",
|
|
"gt_answers": [
|
|
"Timothy's World Coffee"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many K-cups?",
|
|
"answer": "12",
|
|
"gt_answers": [
|
|
"24"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flavor is it?",
|
|
"answer": "caramel apple",
|
|
"gt_answers": [
|
|
"Cinnamon"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many servings are in this container?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"About 5",
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016650.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many calories are in one serving?",
|
|
"answer": "240 calories",
|
|
"gt_answers": [
|
|
"170"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016650.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much sodium is in one serving?",
|
|
"answer": "480 mg sodium",
|
|
"gt_answers": [
|
|
"550mg"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016650.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the golf ball say?",
|
|
"answer": "david 60",
|
|
"gt_answers": [
|
|
"David 60"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03445777_838.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the grass say?",
|
|
"answer": "david 60",
|
|
"gt_answers": [
|
|
"Happy Birthday"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03445777_838.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the sign on the left?",
|
|
"answer": "u s open tennis",
|
|
"gt_answers": [
|
|
"Vittel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000370120.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What photographer took this photo?",
|
|
"answer": "michael scott",
|
|
"gt_answers": [
|
|
"DBG Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346253.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the orange poster say?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"because you will be inspired"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_88.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which colors are painted on the fire hydrant?",
|
|
"answer": "red and yellow",
|
|
"gt_answers": [
|
|
"Yellow, red, green",
|
|
"Yellow, green and red"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334644.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOR OF THE AEROPLANE?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"WHITE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357500.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the advertiser behind the player?",
|
|
"answer": "dew",
|
|
"gt_answers": [
|
|
"Hilton"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404875.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store behind the mini van?",
|
|
"answer": "rose",
|
|
"gt_answers": [
|
|
"Rose Metal"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339698.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name on the tag?",
|
|
"answer": "ferret",
|
|
"gt_answers": [
|
|
"Angela"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02441942_21631.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the sign with the swan?",
|
|
"answer": "swatch",
|
|
"gt_answers": [
|
|
"swarovski"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363419.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train on the left?",
|
|
"answer": "the train on the right",
|
|
"gt_answers": [
|
|
"115012"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373563.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of both trains?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"SNCF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373563.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is in the clocks?",
|
|
"answer": "the clocks are showing different times",
|
|
"gt_answers": [
|
|
"12:38:56"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03197337_3129.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the product?",
|
|
"answer": "oil filter",
|
|
"gt_answers": [
|
|
"Oil Filter"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03843555_7987.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which is the brand?",
|
|
"answer": "toyota",
|
|
"gt_answers": [
|
|
"Toyota"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03843555_7987.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many doughnuts are being eaten by women?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"one"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370664.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many trees are in this picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327949.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many people are in mid-air?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327949.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Wht is the number listed on the sign on the right?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"1432"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410543.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the orange cube?",
|
|
"answer": "b",
|
|
"gt_answers": [
|
|
"B"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000140563.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this tool?",
|
|
"answer": "dewalt",
|
|
"gt_answers": [
|
|
"Oupu"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03995372_9600.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the fourth large letter written on the side of the truck?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"c"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414764.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is above the screen?",
|
|
"answer": "a clock",
|
|
"gt_answers": [
|
|
"SEnli"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403765.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of information is being logged on the computer?",
|
|
"answer": "e mail",
|
|
"gt_answers": [
|
|
"FIELD notes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000078394.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of documents are on top of the computer?",
|
|
"answer": "a laptop, a phone, a fax machine, and a printer",
|
|
"gt_answers": [
|
|
"FIELD notes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000078394.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What animal is in the picture?",
|
|
"answer": "tiger",
|
|
"gt_answers": [
|
|
"Tiger"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the statue holding in his hand?",
|
|
"answer": "umbrella",
|
|
"gt_answers": [
|
|
"Umbrella"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the fence?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"Black"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is appeared in the red heart appeared in wall/",
|
|
"answer": "graffiti",
|
|
"gt_answers": [
|
|
"pain"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where are the oranges from?",
|
|
"answer": "the store",
|
|
"gt_answers": [
|
|
"California"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How are the oranges described on this box?",
|
|
"answer": "oranges",
|
|
"gt_answers": [
|
|
"Fresh & Juicy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the side of the boat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"1646"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000529956.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this place help you grow?",
|
|
"answer": "a library is a great place to study",
|
|
"gt_answers": [
|
|
"business",
|
|
"YOUR OWN BUSINESS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000338030.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase is written on the black and white signs?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377166.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the main text on the sticker",
|
|
"answer": "paris paris",
|
|
"gt_answers": [
|
|
"Dessous"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2274.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the sign say?",
|
|
"answer": "alto",
|
|
"gt_answers": [
|
|
"alto"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000025797.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the green,white, and red sticker say?",
|
|
"answer": "alto",
|
|
"gt_answers": [
|
|
"MADRAZO ES el BUENO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000025797.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS PRINTED ON GREEN JERSEY?",
|
|
"answer": "team name",
|
|
"gt_answers": [
|
|
"FMI"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355772.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS PRINTED ON BLUE JERSEY?",
|
|
"answer": "team name",
|
|
"gt_answers": [
|
|
"M LINE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355772.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHO IS COPY RIGHT AUTHORITY?",
|
|
"answer": "the united states copyright office",
|
|
"gt_answers": [
|
|
"WALTHER.SIKSMA.NL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355772.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the white bus ?",
|
|
"answer": "volvo",
|
|
"gt_answers": [
|
|
"Newport"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When does the store in the image open?",
|
|
"answer": "7 am",
|
|
"gt_answers": [
|
|
"7.30AM"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_144.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the plate number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"749 248-1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000323252.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which sale is being advertised?",
|
|
"answer": "50% off",
|
|
"gt_answers": [
|
|
"End of season sale",
|
|
"END OF SEASON"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Products in this sale are up to how much off?",
|
|
"answer": "50%",
|
|
"gt_answers": [
|
|
"50 PERCENT"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand listed on the object?",
|
|
"answer": "palladium",
|
|
"gt_answers": [
|
|
"PALLADIO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03908714_3808.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the sandwich wrapper?",
|
|
"answer": "italy",
|
|
"gt_answers": [
|
|
"MILANO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411096.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can't be stopped?",
|
|
"answer": "dancing",
|
|
"gt_answers": [
|
|
"Dancin'"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346088.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what sign is written on the stone?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company does the lady wearing a silver watch work for?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ZB"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000515053.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How is the event being broadcast?",
|
|
"answer": "live stream",
|
|
"gt_answers": [
|
|
"LIVE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000515053.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is under the leaf icon?",
|
|
"answer": "new",
|
|
"gt_answers": [
|
|
"new"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000515053.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the desktop computer?",
|
|
"answer": "dell",
|
|
"gt_answers": [
|
|
"DELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000134586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red neon sign?",
|
|
"answer": "tavern",
|
|
"gt_answers": [
|
|
"Restrooms"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363711.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the word on the red signs?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street sign is shown?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"Pelican"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what dose the orange sticker say",
|
|
"answer": "\"do not touch\"",
|
|
"gt_answers": [
|
|
"biohazard waste"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year on the bus?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2018"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324886.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written after PROP",
|
|
"answer": "8",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the store?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Honda"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000597.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the only letter on show?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000532779.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airplane?",
|
|
"answer": "emirates airlines",
|
|
"gt_answers": [
|
|
"Emirates"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word to the left of the lady in blue?",
|
|
"answer": "card",
|
|
"gt_answers": [
|
|
"Card"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416048.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many times does the avarage adult heart beats per minute?",
|
|
"answer": "60 to 100",
|
|
"gt_answers": [
|
|
"72 times",
|
|
"72"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_39.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the commercial display?",
|
|
"answer": "ipod",
|
|
"gt_answers": [
|
|
"Human Body Experience"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_39.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country name is written on the side of the aircraft?",
|
|
"answer": "usa",
|
|
"gt_answers": [
|
|
"United States"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04266014_1118.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of beer is on the table?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"KIRIN ICHIBAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000151103.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big word on the red sign?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Closed"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in the white light on the left at the top of the window?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"10708"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the red sign say?",
|
|
"answer": "burger king",
|
|
"gt_answers": [
|
|
"closed"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the address in the window?",
|
|
"answer": "the address in the window",
|
|
"gt_answers": [
|
|
"10708"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of wine is on the left?",
|
|
"answer": "port",
|
|
"gt_answers": [
|
|
"Shiraz"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000149221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of wine is on the right?",
|
|
"answer": "port",
|
|
"gt_answers": [
|
|
"Chardonnay"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000149221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the size of the container on the right?",
|
|
"answer": "750ml",
|
|
"gt_answers": [
|
|
"750ml",
|
|
"750 ML"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000149221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the mans jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"21"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000570440.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the image tag?",
|
|
"answer": "hdr",
|
|
"gt_answers": [
|
|
"Richard"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03065424_54356.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time was this photo taken?",
|
|
"answer": "08:00",
|
|
"gt_answers": [
|
|
"4:39 AM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363429.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What date was this photo taken?",
|
|
"answer": "october 1",
|
|
"gt_answers": [
|
|
"09/02/08"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363429.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the tail of the plane?",
|
|
"answer": "eagle",
|
|
"gt_answers": [
|
|
"Skyline"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374103.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is highlighted in Green?",
|
|
"answer": "speedometer",
|
|
"gt_answers": [
|
|
"N"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03841143_897.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is the skateboard?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"black",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414825.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the cross street?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"MAIN ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000526362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What building do you see?",
|
|
"answer": "a building",
|
|
"gt_answers": [
|
|
"PERMANENT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361060.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the closest store shown?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Boutique"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_659.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the word on the binocholars?",
|
|
"answer": "triplet",
|
|
"gt_answers": [
|
|
"Triplet"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_10281.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the registration number of the bus?",
|
|
"answer": "the bus's registration number is person",
|
|
"gt_answers": [
|
|
"14-36",
|
|
"1436"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000280339.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number code written at the bottom of the image?",
|
|
"answer": "mf-mf-mf",
|
|
"gt_answers": [
|
|
"021-51697948"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04505470_2816.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what type of drink is on the floor?",
|
|
"answer": "water",
|
|
"gt_answers": [
|
|
"water"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04557648_3789.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on this train's wagon?",
|
|
"answer": "sb ffs",
|
|
"gt_answers": [
|
|
"SBB CFF FFS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362688.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the restaurant?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"Ramen Play"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_424.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is displayed in red?",
|
|
"answer": "people",
|
|
"gt_answers": [
|
|
"Ramen Play"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_424.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Pro Kennex"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000513943.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does the clock say?",
|
|
"answer": "it says 0",
|
|
"gt_answers": [
|
|
"PM 7:35"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000874.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bottom oven screen?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"2306"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000203035.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus schedule number?",
|
|
"answer": "the bus schedule number is",
|
|
"gt_answers": [
|
|
"335"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355773.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are written in red?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"Bata"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000171.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the player's shirt?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Street number address of this building?",
|
|
"answer": "xerox building - xerox corporation",
|
|
"gt_answers": [
|
|
"100 S. Clinton Ave",
|
|
"100 S CLINTON AVENUE"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the street sign?",
|
|
"answer": "bedford street",
|
|
"gt_answers": [
|
|
"Bedford St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red sign with green writing advertising?",
|
|
"answer": "a restaurant",
|
|
"gt_answers": [
|
|
"Hoi Loon Hotel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000220992.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which gas station sign is on the left of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"7 Eleven"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000220992.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the player's shirt say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LONEY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the plane?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this building used for according to the sign above it?",
|
|
"answer": "post office",
|
|
"gt_answers": [
|
|
"Post Office"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is advertised on the purple sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Fila"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000147051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of chips are featured?",
|
|
"answer": "tortilla chips",
|
|
"gt_answers": [
|
|
"Bar B Que"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000574635.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on tee-shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FEAR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042312.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is \"queer\"?",
|
|
"answer": "queer",
|
|
"gt_answers": [
|
|
"The 318 Roadswitcher"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03393912_5876.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is pictured?",
|
|
"answer": "23",
|
|
"gt_answers": [
|
|
"218300 67700"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03393912_5876.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What band is the poster advertising?",
|
|
"answer": "the blues brothers",
|
|
"gt_answers": [
|
|
"THE BLUES BROTHERS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352789.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign at the top of the bus read?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"School Bus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000039733.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the far right?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"Watsons"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324830.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is it called when you cease all movement?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do you drive that has four wheels and a motor?",
|
|
"answer": "a vehicle",
|
|
"gt_answers": [
|
|
"vehicle"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000117922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name displayed at the bottom?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"STACY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000012228.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bus?",
|
|
"answer": "mta bus",
|
|
"gt_answers": [
|
|
"889"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326832.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who made the scooter?",
|
|
"answer": "vespa",
|
|
"gt_answers": [
|
|
"Ferrari"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kid of shop is on the red background?",
|
|
"answer": "a motorcycle shop",
|
|
"gt_answers": [
|
|
"BOOKSHOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the batter's number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"55"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000004201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is sponsoring the game?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"modell's sporting goods"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000004201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of tricycle is it?",
|
|
"answer": "super trike",
|
|
"gt_answers": [
|
|
"Super Stock"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04482393_14369.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the red sign?",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"Colgate"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327119.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color are the M&M's in the first tube?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"Aqua Green",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03089624_97784.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is this picture from?",
|
|
"answer": "123rf",
|
|
"gt_answers": [
|
|
"www.shutterstock.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07718472_29620.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text in the middle say?",
|
|
"answer": "cucumbers on white background",
|
|
"gt_answers": [
|
|
"SHUTTER STOCK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07718472_29620.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text at the bottom say?",
|
|
"answer": "cucumbers on white background",
|
|
"gt_answers": [
|
|
"www.shutterstock.com 30031780"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07718472_29620.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of fruit is this?",
|
|
"answer": "orange",
|
|
"gt_answers": [
|
|
"Orange"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351930.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the slogan on the plane?",
|
|
"answer": "the city better life",
|
|
"gt_answers": [
|
|
"Better City, Better Life"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000277319.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the bike in the background?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"88"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000458505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What event is it?",
|
|
"answer": "flood in paris",
|
|
"gt_answers": [
|
|
"Crue de la Seine"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000150161.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which city is it?",
|
|
"answer": "paris",
|
|
"gt_answers": [
|
|
"Paris"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000150161.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does the Rolex sign say it is?",
|
|
"answer": "it reads 01:00",
|
|
"gt_answers": [
|
|
"2:28"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is written on the tire?",
|
|
"answer": "1984",
|
|
"gt_answers": [
|
|
"2001"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02974003_1454.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the toy?",
|
|
"answer": "lego",
|
|
"gt_answers": [
|
|
"LEGO Group"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02974003_1454.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what sore is shown on the leftt",
|
|
"answer": "mall",
|
|
"gt_answers": [
|
|
"Topman"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_158.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the destination of the bus?",
|
|
"answer": "montreal",
|
|
"gt_answers": [
|
|
"Vulcan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000083219.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is above the headlight on the bus?",
|
|
"answer": "the bus number",
|
|
"gt_answers": [
|
|
"2405"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000083219.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is dangerous about the floor?",
|
|
"answer": "slippery",
|
|
"gt_answers": [
|
|
"Wet Floor"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_573.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the players helmet?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"w"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000094155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first 3 letters on the players jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BRO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000094155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the last letter on the players jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"N"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000094155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "detour ahead",
|
|
"gt_answers": [
|
|
"Detour ahead"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326260.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the small bridge say?",
|
|
"answer": "no wake zone",
|
|
"gt_answers": [
|
|
"Open"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03947888_6703.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first 4 letters of the licence plate?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BJFG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000445006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the orange sign in white letters?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"rexall"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499307.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is being advertised on the yellow sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Cowan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499307.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top word in the bottom right corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Fotografia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553896.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the middle word in the bottom right corner",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"mpergon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553896.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two websites appear on the bottom right corner?",
|
|
"answer": "person and person",
|
|
"gt_answers": [
|
|
"Flickr & Facebook"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553896.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written in red?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000049551.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the tennis match being held?",
|
|
"answer": "in a stadium",
|
|
"gt_answers": [
|
|
"wimbledon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000231748.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the skateboarder\ufffd\ufffd\ufffds shirt?",
|
|
"answer": "skate",
|
|
"gt_answers": [
|
|
"CIRCA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what hotel is on the magazine?",
|
|
"answer": "holiday inn express hotel &amp suites",
|
|
"gt_answers": [
|
|
"Holiday Inn"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000537862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store in this photograph?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"7 Eleven"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360301.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of this video?",
|
|
"answer": "signal jam",
|
|
"gt_answers": [
|
|
"Signal Jam",
|
|
"SIGNAL_JAM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317421.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the advertisement on the right of the tennis player?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Spa"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334339.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the item the mouse is in?",
|
|
"answer": "wheel",
|
|
"gt_answers": [
|
|
"Wodent wheel"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02342885_3448.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flavor is this Mustard?",
|
|
"answer": "spicy brown",
|
|
"gt_answers": [
|
|
"Spicy Brown"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013125.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who makes this Mustard?",
|
|
"answer": "france",
|
|
"gt_answers": [
|
|
"French's"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013125.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How natural is the Mustard?",
|
|
"answer": "it is made with natural ingredients",
|
|
"gt_answers": [
|
|
"100% natural",
|
|
"100%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013125.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what discount is on offer here?",
|
|
"answer": "20% off",
|
|
"gt_answers": [
|
|
"20% off"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_326.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the right?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"H&M"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_113.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "aprovechar la violencia",
|
|
"gt_answers": [
|
|
"Metro"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326650.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What gaming system are these people using?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"wii"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000222472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "an antique store",
|
|
"gt_answers": [
|
|
"Pucket's Grocery",
|
|
"PUCKETS GROCERY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414407.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the magazine in the picture?",
|
|
"answer": "domino",
|
|
"gt_answers": [
|
|
"domino"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317136.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What beer is advertised?",
|
|
"answer": "bud light",
|
|
"gt_answers": [
|
|
"COORS LIGHT"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/510.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is name of business?",
|
|
"answer": "pizza hut",
|
|
"gt_answers": [
|
|
"ZAFFIRO'S"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/510.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was business started?",
|
|
"answer": "1939",
|
|
"gt_answers": [
|
|
"1954"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/510.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company's headquarters are these?",
|
|
"answer": "sony corporation",
|
|
"gt_answers": [
|
|
"Sony"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001173.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the van?",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"police"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last three digits on the car's license plate?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"007"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000077806.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the player?",
|
|
"answer": "tow",
|
|
"gt_answers": [
|
|
"TOVAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411747.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the player?",
|
|
"answer": "32",
|
|
"gt_answers": [
|
|
"83"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411747.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the jar?",
|
|
"answer": "chili sauce",
|
|
"gt_answers": [
|
|
"Butterscotch"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006020.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photograph?",
|
|
"answer": "daniel schmid",
|
|
"gt_answers": [
|
|
"Georg Peter Landsiedel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top line of the statement on the boy's shirt?",
|
|
"answer": "i love baseball",
|
|
"gt_answers": [
|
|
"WHAT'S ON MY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000364815.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the maximum fine?",
|
|
"answer": "&ampamp &amp &amp &amp &amp",
|
|
"gt_answers": [
|
|
"$10,000"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What will happen to offenders?",
|
|
"answer": "they will be fined",
|
|
"gt_answers": [
|
|
"Prosecuted"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "spadina avenue",
|
|
"gt_answers": [
|
|
"Spadina Av"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was the copyright issued?",
|
|
"answer": "1923",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the statement of warning on the door?",
|
|
"answer": "bear in the car",
|
|
"gt_answers": [
|
|
"This could happen to your vehicle"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361799.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last letter on the train car?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"r"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365759.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter on the train car?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"g"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365759.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter appears in the middle of the train car?",
|
|
"answer": "c",
|
|
"gt_answers": [
|
|
"w"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365759.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the lowest number written on a drawer?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000571431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word written below the fruit?",
|
|
"answer": "vitamins",
|
|
"gt_answers": [
|
|
"Vitamins!",
|
|
"VITAMINS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does white board attached to the pole displayed",
|
|
"answer": "caution pedestrians crossing",
|
|
"gt_answers": [
|
|
"CCTV"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06874185_1436.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the red board displayed",
|
|
"answer": "caution pedestrians crossing",
|
|
"gt_answers": [
|
|
"CAUTION PEDESTRIANS CROSSING"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06874185_1436.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of station is this?",
|
|
"answer": "police station",
|
|
"gt_answers": [
|
|
"Police"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last three letters on the truck license plate?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MCO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000379143.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name shown in the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Rachel"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/626.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Main scent of this product?",
|
|
"answer": "lime",
|
|
"gt_answers": [
|
|
"Cucumber"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made this product?",
|
|
"answer": "fuji apple",
|
|
"gt_answers": [
|
|
"Fuji Apple"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02971356_2288.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is the pink umbrella from?",
|
|
"answer": "samsonite",
|
|
"gt_answers": [
|
|
"Avon"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359894.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the bus?",
|
|
"answer": "taiwan",
|
|
"gt_answers": [
|
|
"A4734"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343338.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign next to the stop sign read?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"Court"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369960.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the octogonal red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369960.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the product weigh?",
|
|
"answer": "750 ml",
|
|
"gt_answers": [
|
|
"432 G"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014642.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What vegetable is in the can?",
|
|
"answer": "cauliflower",
|
|
"gt_answers": [
|
|
"Corn.",
|
|
"COR"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014642.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the green sign to the right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Phone"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000398938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the brand of the tennis racket?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"wilson"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000101218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04311174_7512.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many men are in this picture?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"three"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color shirt is the man in the middle wearing?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"green"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street posted?",
|
|
"answer": "washington dc",
|
|
"gt_answers": [
|
|
"GEORGE WASHINGTON Blvd."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336387.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the jersey of the player to the right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373575.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the type of gecko in the image?",
|
|
"answer": "leopard gecko",
|
|
"gt_answers": [
|
|
"Golden",
|
|
"golden gate"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01675722_11054.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is written on this building?",
|
|
"answer": "marriott",
|
|
"gt_answers": [
|
|
"Marriott"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000738.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name for the player number 32?",
|
|
"answer": "scott nogelsen",
|
|
"gt_answers": [
|
|
"Vogelsong"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324879.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What event is being celebrated (one word)?",
|
|
"answer": "birthday",
|
|
"gt_answers": [
|
|
"BIRTHDAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000250424.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What expression is on the dog's face?",
|
|
"answer": "happy",
|
|
"gt_answers": [
|
|
"happy"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000250424.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the certificate of?",
|
|
"answer": "a certificate of authenticity",
|
|
"gt_answers": [
|
|
"Authenticity"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03272010_3475.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "road work ahead",
|
|
"gt_answers": [
|
|
"Road work ahead"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000136652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "According to the sign what is reserved?",
|
|
"answer": "parking lot",
|
|
"gt_answers": [
|
|
"PARKING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red octagon instructing you to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What Cardinal is one the street sign?",
|
|
"answer": "red cardinal",
|
|
"gt_answers": [
|
|
"CARDINAL MEDEIROS AVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is in front of the bus?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Marshalls"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367038.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red sign in front of the bus?",
|
|
"answer": "bus stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367038.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first two words of the black lettering?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"All Rights"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the tin box?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"altoids"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000280926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on the top sign?",
|
|
"answer": "pete rose",
|
|
"gt_answers": [
|
|
"Pete Rose Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on the lower sign?",
|
|
"answer": "pete rose",
|
|
"gt_answers": [
|
|
"Broadway"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is next to Broadway?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"199"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is it?",
|
|
"answer": "in the car",
|
|
"gt_answers": [
|
|
"4:15"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000057264.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the arrow on the street sign?",
|
|
"answer": "nassau street",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371883.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "nassau street",
|
|
"gt_answers": [
|
|
"MOSCO ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371883.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371883.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are on the green part of the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SCT"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4889.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the image",
|
|
"answer": "butterfly on a leaf",
|
|
"gt_answers": [
|
|
"lanjenfong"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02281787_2337.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the Travel Information sign?",
|
|
"answer": "travel information",
|
|
"gt_answers": [
|
|
"Save money, shed pounds, bike to work",
|
|
"SAVE MONEY SHED POUNDS BIKE TO WORK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2000",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04118538_7470.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the road?",
|
|
"answer": "rd de la rochelle",
|
|
"gt_answers": [
|
|
"RUE PETIT CULOT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000016080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which word means little in French?",
|
|
"answer": "petit",
|
|
"gt_answers": [
|
|
"PETIT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000016080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the camera lens cover in the picture?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000575486.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter of the word listed on the side of the beam?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"A"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02777292_2163.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is one word displayed on this picture?",
|
|
"answer": "canon",
|
|
"gt_answers": [
|
|
"Canon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_15161.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the destination of the bus",
|
|
"answer": "trafalgar square",
|
|
"gt_answers": [
|
|
"HAMPSTEAD HEATH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000345625.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of plane is this?",
|
|
"answer": "a wwii fighter plane",
|
|
"gt_answers": [
|
|
"NAVION L-17"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000000247.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the propeller say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"NAVION L-17"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000000247.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written after NAVION?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"L-17"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000000247.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What product is on the shelves of the stand?",
|
|
"answer": "books",
|
|
"gt_answers": [
|
|
"books"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02871525_29077.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the make of the monitor?",
|
|
"answer": "acer",
|
|
"gt_answers": [
|
|
"Dell"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/914.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was the picture taken?",
|
|
"answer": "june 2012",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000575252.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow sign say?",
|
|
"answer": "rough road",
|
|
"gt_answers": [
|
|
"Rough Road"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4394.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter on the white sign?",
|
|
"answer": "r",
|
|
"gt_answers": [
|
|
"S"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4394.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of her neck brace?",
|
|
"answer": "the vertebral pillow",
|
|
"gt_answers": [
|
|
"Aspen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03814639_4082.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which number is on the PRICE jersey in the top left of the photo?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"31"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the photo taken?",
|
|
"answer": "pakistan",
|
|
"gt_answers": [
|
|
"Rakaposhi, Pakistan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013497.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is in this photo?",
|
|
"answer": "pakistan",
|
|
"gt_answers": [
|
|
"Pakistan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013497.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is worded on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of protection does the drywall offer?",
|
|
"answer": "fire protection",
|
|
"gt_answers": [
|
|
"Superior moisture and mold protection"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342676.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the dog's vest?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02091831_680.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the dog's vest?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02091831_680.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the dog?",
|
|
"answer": "white and brown",
|
|
"gt_answers": [
|
|
"Tan and white",
|
|
"WHITE, GREY"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02091831_680.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT WORD IS ON THE BLUE SIGN IN THE TOP LEFT CORNER?",
|
|
"answer": "cow",
|
|
"gt_answers": [
|
|
"CUSHIONS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT WORD IS ABOVE READYMADES?",
|
|
"answer": "cow",
|
|
"gt_answers": [
|
|
"TEXTILES"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What day was the photo taken?",
|
|
"answer": "sunday",
|
|
"gt_answers": [
|
|
"23"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331408.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331408.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of food is in the box?",
|
|
"answer": "donuts",
|
|
"gt_answers": [
|
|
"donuts"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415437.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in green letters at the top?",
|
|
"answer": "mass transit",
|
|
"gt_answers": [
|
|
"mass transit"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000291855.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"4080"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000291855.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of transit is this?",
|
|
"answer": "mass transit",
|
|
"gt_answers": [
|
|
"mass"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000291855.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the guy's shirt say?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"Cat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What make is the eraser?",
|
|
"answer": "sanford",
|
|
"gt_answers": [
|
|
"Impega"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03908618_16819.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What make is the pencil?",
|
|
"answer": "unbranded",
|
|
"gt_answers": [
|
|
"Dixon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03908618_16819.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took the image?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Angela Kraft"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02441942_20126.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the skier?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365924.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is displayed on the front and side of the vending machine?",
|
|
"answer": "mountain dew",
|
|
"gt_answers": [
|
|
"Mountain Dew"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_11403.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the truck?",
|
|
"answer": "mountain dew",
|
|
"gt_answers": [
|
|
"Dodge"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_11403.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"JASKIRAT SINGH BAWA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number (4 digits)?",
|
|
"answer": "royal enfield classic 350",
|
|
"gt_answers": [
|
|
"9500"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the green banner?",
|
|
"answer": "paris",
|
|
"gt_answers": [
|
|
"PARIBAS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372419.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the street sign?",
|
|
"answer": "cherry ave",
|
|
"gt_answers": [
|
|
"Cherry Av"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kid of place is the supermarket?",
|
|
"answer": "grocery store",
|
|
"gt_answers": [
|
|
"Food Plus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word under the word food?",
|
|
"answer": "cherry",
|
|
"gt_answers": [
|
|
"GROCERY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "par",
|
|
"gt_answers": [
|
|
"pare"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409030.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of the Kitchen?",
|
|
"answer": "it is a ikea kitchen",
|
|
"gt_answers": [
|
|
"2490"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000153064.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter located on the skier's thigh?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"b"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000395040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what make is the chair?",
|
|
"answer": "schroth",
|
|
"gt_answers": [
|
|
"sparco"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04037443_9119.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters underneath the snow board?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FORUM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is shown next to the sun/bird/waves image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number bus are these passengers riding?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"2303"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "A passenger just requested a:",
|
|
"answer": "what is the name of person's dog",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was the golf course found?",
|
|
"answer": "the golf course was found in 1895",
|
|
"gt_answers": [
|
|
"1982"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417940.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the total telethon pledge so far?",
|
|
"answer": "over a billion dollars",
|
|
"gt_answers": [
|
|
"$1,337"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/634.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the police department?",
|
|
"answer": "new york city police department",
|
|
"gt_answers": [
|
|
"NEW YORK POLICE DEPT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/634.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company sign is lit up in pink?",
|
|
"answer": "yahoo",
|
|
"gt_answers": [
|
|
"Yahoo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/634.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the word in big letters on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367039.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow sign say?",
|
|
"answer": "yellow brick rd",
|
|
"gt_answers": [
|
|
"Yellow Brick Rd"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417211.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom sign say?",
|
|
"answer": "yellow brick rd",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417211.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the black sign say?",
|
|
"answer": "yellow brick rd",
|
|
"gt_answers": [
|
|
"Pancake Blvd",
|
|
"PANCAKE BLVD."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417211.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much vitamin C is in this product?",
|
|
"answer": "0 mg vitamin c",
|
|
"gt_answers": [
|
|
"4%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011029.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address?",
|
|
"answer": "www.starbulldogs.com",
|
|
"gt_answers": [
|
|
"www.stafbul.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02093256_10439.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is straight ahead?",
|
|
"answer": "roma",
|
|
"gt_answers": [
|
|
"Roma"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the cup?",
|
|
"answer": "onion rings",
|
|
"gt_answers": [
|
|
"Wind mill"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus headed?",
|
|
"answer": "downtown",
|
|
"gt_answers": [
|
|
"Mt Airy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372803.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the entrance on the left",
|
|
"answer": "toilets",
|
|
"gt_answers": [
|
|
"SHIELA'S"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the entrance on the right",
|
|
"answer": "toilets",
|
|
"gt_answers": [
|
|
"BLOKE'S"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the place name written at the top on the person's shirt?",
|
|
"answer": "red wings",
|
|
"gt_answers": [
|
|
"Detroit"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000370727.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the station?",
|
|
"answer": "train station",
|
|
"gt_answers": [
|
|
"jennings",
|
|
"JENNINGS STATION"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03220513_9458.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being signed up today?",
|
|
"answer": "a street sign",
|
|
"gt_answers": [
|
|
"Northwest Seattle Little League"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360554.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the player that has the Frisbee?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000409628.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bag say?",
|
|
"answer": "construction",
|
|
"gt_answers": [
|
|
"Thank You"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325274.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What direction is on the back wall?",
|
|
"answer": "west",
|
|
"gt_answers": [
|
|
"WEST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407306.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THIS SHOP?",
|
|
"answer": "telecommunications",
|
|
"gt_answers": [
|
|
"POP TELECOM"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_232.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in front of the store?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"POP TELECOM"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_232.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written along the horizontal line of the cross sign?",
|
|
"answer": "allen chapel",
|
|
"gt_answers": [
|
|
"Allen Chapel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377350.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written in the box under the cross?",
|
|
"answer": "allen chapel",
|
|
"gt_answers": [
|
|
"Dr. Sherryl Matlock"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377350.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many bottles of alchol are there?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many cards are there?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What beer is the red truck branded for?",
|
|
"answer": "bud light",
|
|
"gt_answers": [
|
|
"Budweiser"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372063.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company name written in green?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Northside"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000530750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word in white bold font next to the apple slice?",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"Unser"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015300.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which number train is this?",
|
|
"answer": "m train",
|
|
"gt_answers": [
|
|
"54"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406256.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this train going to?",
|
|
"answer": "a city",
|
|
"gt_answers": [
|
|
"Cermak"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406256.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the copyright name at the bottom?",
|
|
"answer": "john",
|
|
"gt_answers": [
|
|
"Casey"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01796340_13874.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the question for #4",
|
|
"answer": "what is the declaration of independence",
|
|
"gt_answers": [
|
|
"What do you call a tyrants harsh reign?"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002474.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is shown in the top right corner?",
|
|
"answer": "the declaration of independence",
|
|
"gt_answers": [
|
|
"Review"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002474.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this store free of?",
|
|
"answer": "duty",
|
|
"gt_answers": [
|
|
"Duty"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the side of the ship?",
|
|
"answer": "hatsu",
|
|
"gt_answers": [
|
|
"Hatsu"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03095699_5088.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the paper on the wall?",
|
|
"answer": "a list of items on the table",
|
|
"gt_answers": [
|
|
"Laptop Charity Project"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328625.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"97"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402621.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the paper say?",
|
|
"answer": "it is a coupon",
|
|
"gt_answers": [
|
|
"Coffee Pot Restaurant"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03063689_9251.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000266334.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bottom right of the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"158723"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042156.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the bottom left of the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"first"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042156.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 6 digit number is on the train?",
|
|
"answer": "057",
|
|
"gt_answers": [
|
|
"158723"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042156.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the operation listed?",
|
|
"answer": "cup",
|
|
"gt_answers": [
|
|
"Cuppa"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363737.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color has the car on the left?",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"red"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4987.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color has the car on the right?",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"yellow"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4987.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are written on the brown pot?",
|
|
"answer": "brew pot",
|
|
"gt_answers": [
|
|
"Aerobell"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04554684_3186.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the logo on the tank say?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Aerobell"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04554684_3186.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the player with the bat in his hands?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"13"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000302051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the net weight of this product in ounces?",
|
|
"answer": "16 ounces",
|
|
"gt_answers": [
|
|
"15",
|
|
"15 oz"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016443.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many grams is this product?",
|
|
"answer": "450 grams",
|
|
"gt_answers": [
|
|
"425"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016443.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the red ruler?",
|
|
"answer": "modern",
|
|
"gt_answers": [
|
|
"Modern"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04118776_15789.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the amount of sale?",
|
|
"answer": "the price of the gas",
|
|
"gt_answers": [
|
|
"24.93"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_7985.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written next to the white heart?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Wedding Special"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000099162.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the red metal sheet sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what handwritten word is written next to the sign?",
|
|
"answer": "hate",
|
|
"gt_answers": [
|
|
"HATE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "letter on fridge?",
|
|
"answer": "letter on fridge",
|
|
"gt_answers": [
|
|
"A"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000470414.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the school bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"ATLANTIC EXPRESS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353548.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the copyright owner?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Miguel Navaza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345298.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year of the copyright?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345298.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of beer is in the photo?",
|
|
"answer": "coors light",
|
|
"gt_answers": [
|
|
"Corona Extra"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in the bottom right corner?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347292.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word is word on the bottom right ?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"axel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347292.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the grey shirt say",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Dynamic sports Management & Bethessa Sports and Health Club"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000524027.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the black jersey?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316691.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What day is the SF Weekly free on",
|
|
"answer": "saturday",
|
|
"gt_answers": [
|
|
"Wednesday"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000382083.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price displayed in the right of the picture?",
|
|
"answer": "free",
|
|
"gt_answers": [
|
|
"9.99",
|
|
"999"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000382083.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the order number?",
|
|
"answer": "40",
|
|
"gt_answers": [
|
|
"49"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4826.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airline called?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Alitalia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132773.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the signs want to do about foreclosures?",
|
|
"answer": "stop foreclosures",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407418.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three companies are sponsored in this event?",
|
|
"answer": "mcdonald s coca cola",
|
|
"gt_answers": [
|
|
"GATORADE EICO AT&T"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351551.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of wine was in the clear bottle?",
|
|
"answer": "white wine",
|
|
"gt_answers": [
|
|
"Elderflower Sparkling"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where was the wine produced?",
|
|
"answer": "australia",
|
|
"gt_answers": [
|
|
"New Zealand"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number comes directly before the 3 in this picture?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"0"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_932.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number comes after 03-?",
|
|
"answer": "04-",
|
|
"gt_answers": [
|
|
"26"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_932.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name on the sign?",
|
|
"answer": "mr",
|
|
"gt_answers": [
|
|
"Buffet Town"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_178.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is written on the big white sign above the truck?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Jones"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000158451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the BRAND name of the toaster?",
|
|
"answer": "ruby",
|
|
"gt_answers": [
|
|
"DU-PLEX locust",
|
|
"DU-PLEX"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04442312_7525.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is in the corner of the image?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04141076_42088.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train car, painted in yellow?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000514519.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written in white on the blue background of the monitor?",
|
|
"answer": "computer",
|
|
"gt_answers": [
|
|
"Unley"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000410436.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the object title?",
|
|
"answer": "jesus face",
|
|
"gt_answers": [
|
|
"Church Prayer Rug"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03998194_9441.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the two words on the front of the telephone?",
|
|
"answer": "flash relay",
|
|
"gt_answers": [
|
|
"Flash Redial"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03187595_2495.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top part of the center button do?",
|
|
"answer": "it is a flash dial",
|
|
"gt_answers": [
|
|
"Flash"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03187595_2495.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom part of the center button do?",
|
|
"answer": "turn the phone off",
|
|
"gt_answers": [
|
|
"Redial"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03187595_2495.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the top blue box?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175651.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the bottom blue box?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175651.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written inside the circle cutout on the top blue box?",
|
|
"answer": "samsung galaxy s3",
|
|
"gt_answers": [
|
|
"Wii"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175651.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car company sponsored this event?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Kia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377149.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is no parking allowed?",
|
|
"answer": "in front of the city hall",
|
|
"gt_answers": [
|
|
"IN AISLES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000454916.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is under the clock?",
|
|
"answer": "pasha",
|
|
"gt_answers": [
|
|
"Pasela"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354695.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red sign advertising?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PLAZA SNACKS"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number on the red and blue car?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LS06 BCX"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361412.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom sign say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"One way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361412.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was this photo taken?",
|
|
"answer": "a bathroom",
|
|
"gt_answers": [
|
|
"11/14/2007"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592672.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the yellow helmet?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SCOTT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450003.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the right side of the white bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"6035"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366243.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to the next stop",
|
|
"gt_answers": [
|
|
"North Bergen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366243.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of automobile is the red SUV?",
|
|
"answer": "suv",
|
|
"gt_answers": [
|
|
"Mitsubishi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366243.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "sams club",
|
|
"gt_answers": [
|
|
"Files"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_275.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What soft drink company name is on the red disk?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Coca-Cola"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345165.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store name appears in yellow in the background?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Get Stuffed"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345165.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name?",
|
|
"answer": "johns market",
|
|
"gt_answers": [
|
|
"10TH ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000033891.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sign in below the RC logo?",
|
|
"answer": "johns market",
|
|
"gt_answers": [
|
|
"JOHN'S MARKET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000033891.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the class year written on the face of the clock?",
|
|
"answer": "the class year",
|
|
"gt_answers": [
|
|
"1904"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/286090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is registration number of the plane?",
|
|
"answer": "the plane is registered as fs-fs",
|
|
"gt_answers": [
|
|
"N40960"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the convenience store?",
|
|
"answer": "sainsburys",
|
|
"gt_answers": [
|
|
"Ajans"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337588.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bank in the photo?",
|
|
"answer": "bank of america",
|
|
"gt_answers": [
|
|
"citibank"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say in blue and white on the bike?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Bud"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the bottom of the bike?",
|
|
"answer": "moped",
|
|
"gt_answers": [
|
|
"Fighter"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of bike is this?",
|
|
"answer": "moped",
|
|
"gt_answers": [
|
|
"WILLAMSF1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website adress on the white banner?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"nikefootball.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the rightmost complete red word on the white banner?",
|
|
"answer": "soccer",
|
|
"gt_answers": [
|
|
"Arsenal"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first (leftmost) word on the white banner?",
|
|
"answer": "arsenal",
|
|
"gt_answers": [
|
|
"Fly"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What category is in the front center?",
|
|
"answer": "fiction",
|
|
"gt_answers": [
|
|
"Finance & Investment"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_462.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the black and white sign say?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065916.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the black clock?",
|
|
"answer": "peoples dental surgery",
|
|
"gt_answers": [
|
|
"Boodle & Dunthorne"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000374796.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the dark car in the foreground?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BG 7181"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000560123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the tan car in the background?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HWK 246N"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000560123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in red lettering on the sign?",
|
|
"answer": "supermercado",
|
|
"gt_answers": [
|
|
"SUPERMERCAT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2585.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase is in black lettering on the sign?",
|
|
"answer": "supermercado",
|
|
"gt_answers": [
|
|
"VINYA DEL MAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2585.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where was this product made?",
|
|
"answer": "united states",
|
|
"gt_answers": [
|
|
"UK"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006041.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much do calls cost?",
|
|
"answer": "50 cents",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03902125_18981.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the referee",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"27"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How is this item labeled?",
|
|
"answer": "sea glass",
|
|
"gt_answers": [
|
|
"Clear"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03633091_15833.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Wht does the yellow sign say to look for?",
|
|
"answer": "trains",
|
|
"gt_answers": [
|
|
"Trains",
|
|
"look for trains"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the three digit number printed on the wall?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"330"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327715.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is a single magazine in euros?",
|
|
"answer": "one euro",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3073.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much are two magazines in euros?",
|
|
"answer": "two euros",
|
|
"gt_answers": [
|
|
"27"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3073.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is to the left?",
|
|
"answer": "objazd",
|
|
"gt_answers": [
|
|
"Objazd"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343979.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat?",
|
|
"answer": "allaren",
|
|
"gt_answers": [
|
|
"alameen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372426.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What road ends here?",
|
|
"answer": "reno",
|
|
"gt_answers": [
|
|
"Genoa"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324067.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which city name is shown here?",
|
|
"answer": "singapore",
|
|
"gt_answers": [
|
|
"TOKYO"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_709.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the left with the black background?",
|
|
"answer": "dillards",
|
|
"gt_answers": [
|
|
"Giordano"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the caption at the bottom of the photo?",
|
|
"answer": "a book with film",
|
|
"gt_answers": [
|
|
"A Bush Mail Box"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000219488.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the title of the red book?",
|
|
"answer": "the art of computer programming",
|
|
"gt_answers": [
|
|
"JavaScript"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713661.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"285"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001092.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the orange and green bus in front?",
|
|
"answer": "ta ta",
|
|
"gt_answers": [
|
|
"29-40-GB"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03895866_65924.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number near the headlight of the orange and green bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"154"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03895866_65924.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "First two tiny letters on shirt?",
|
|
"answer": "a m",
|
|
"gt_answers": [
|
|
"DE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048126.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which skier is in the lead?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"29"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which skier is on the same team as #29?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"12"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is one of the sponsors listed on their uniforms?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GMPA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the right of the man?",
|
|
"answer": "luggage",
|
|
"gt_answers": [
|
|
"Toilets"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "101",
|
|
"gt_answers": [
|
|
"101"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to a city",
|
|
"gt_answers": [
|
|
"SANTA ROSA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of bus is it?",
|
|
"answer": "city bus",
|
|
"gt_answers": [
|
|
"LIMITED STOP EXPRESS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the last 3 numbers?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"466"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03272562_15701.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this player",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sch"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000217929.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Write the English term in red?",
|
|
"answer": "tokyo",
|
|
"gt_answers": [
|
|
"Take Five"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2670.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "From which city is this map?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"San Francisco"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03938244_13982.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is advertised on the airplane tail?",
|
|
"answer": "southwest airlines",
|
|
"gt_answers": [
|
|
"Southwest"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns the copyright to this image?",
|
|
"answer": "southwest airlines",
|
|
"gt_answers": [
|
|
"Brett Lane Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What TV show is this game based on?",
|
|
"answer": "star trek",
|
|
"gt_answers": [
|
|
"STAR TREK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_18328.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the front bus going?",
|
|
"answer": "london city airport",
|
|
"gt_answers": [
|
|
"Barnet Church"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000395101.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the street?",
|
|
"answer": "london road",
|
|
"gt_answers": [
|
|
"No Entry"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000395101.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being sold here?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"BANANAS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000009452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What weight are the bananas being sold by?",
|
|
"answer": "kilogram",
|
|
"gt_answers": [
|
|
"KG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000009452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of the bananas per kg?",
|
|
"answer": "$1",
|
|
"gt_answers": [
|
|
"$11.98"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000009452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the right?",
|
|
"answer": "macys",
|
|
"gt_answers": [
|
|
"MaxMara"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the left?",
|
|
"answer": "macys",
|
|
"gt_answers": [
|
|
"Diane von Furstenberg"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the email listed?",
|
|
"answer": "no email listed",
|
|
"gt_answers": [
|
|
"www.tedsonline.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409292.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the make of this bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"International"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign read on the side of the bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "sc sc",
|
|
"gt_answers": [
|
|
"HX-04709",
|
|
"HX 04709"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color are the back of the skier's gloves?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"Orange",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338110.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the route of the bus?",
|
|
"answer": "crosstown",
|
|
"gt_answers": [
|
|
"41 Crosstown",
|
|
"CROSSTOWN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350256.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the serial number of the plane?",
|
|
"answer": "sn mcp",
|
|
"gt_answers": [
|
|
"n861ma"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175250.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on planes bottom?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.mokuleleairline"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175250.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word of airlines name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Mokulele"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000175250.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOR OF THIS DRESS?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"SILKY WHITE",
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03450230_30009.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is her dress?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03450230_30009.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of food is being advertised?",
|
|
"answer": "desserts",
|
|
"gt_answers": [
|
|
"DESSERTS SWEETS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000075270.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train on the left?",
|
|
"answer": "a train",
|
|
"gt_answers": [
|
|
"1839"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316708.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say on the right?",
|
|
"answer": "londonderry station",
|
|
"gt_answers": [
|
|
"WARNING KEEP OFF ELECTRIC LIVE RAILS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000074997.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say on the left?",
|
|
"answer": "londonderry station",
|
|
"gt_answers": [
|
|
"SOUTHPORT CHAPEL STREET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000074997.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 2 letters appear in the largest font on the front of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the type in the middle of the image?",
|
|
"answer": "keyboard",
|
|
"gt_answers": [
|
|
"Workstation"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000127418.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bbok name?",
|
|
"answer": "kung fu",
|
|
"gt_answers": [
|
|
"KUNG FU and KARATE",
|
|
"COMPLETE GUIDE TO THE DEADLY ARTS OF KUNG FU and KARATE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_12307.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of the book?",
|
|
"answer": "$24.99",
|
|
"gt_answers": [
|
|
"$1.00"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_12307.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number mentioned on the book?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"NO.1 1974"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_12307.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many males are in the picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363245.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the larger name displayed in the corner?",
|
|
"answer": "paul duquet",
|
|
"gt_answers": [
|
|
"Paul Bocquet"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the blue words in the bottom corner?",
|
|
"answer": "paul duquet",
|
|
"gt_answers": [
|
|
"Nicolas Risch Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ValleyMetro"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000068518.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "tahoe transit",
|
|
"gt_answers": [
|
|
"8507"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000068518.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are visible on the white sign in the background?",
|
|
"answer": "person's house",
|
|
"gt_answers": [
|
|
"road crossing"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352739.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338190.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the side of the plane?",
|
|
"answer": "gantas",
|
|
"gt_answers": [
|
|
"Qantas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000511438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is in the building?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"macy's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue pad?",
|
|
"answer": "chemistry le school",
|
|
"gt_answers": [
|
|
"chemistry is chool"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04238763_6896.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sticker on the sign say?",
|
|
"answer": "adolf hitler",
|
|
"gt_answers": [
|
|
"Ial Bullying"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of bakery prepared this food?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sistema bakery"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000269738.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what line is this bus running?",
|
|
"answer": "santa fe",
|
|
"gt_answers": [
|
|
"Linea 102"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361350.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the plane?",
|
|
"answer": "phoenix",
|
|
"gt_answers": [
|
|
"Dynamic"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the green sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Jesus is the way"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000556969.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the man's helmet say?",
|
|
"answer": "harley davidson",
|
|
"gt_answers": [
|
|
"HBO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is on the white sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"New Delhi"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000540082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "vogue bar",
|
|
"gt_answers": [
|
|
"Yogurt Bar"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who has a copywrite for this photo?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"sassy mom's corner"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416368.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is shown on the screen?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"1939"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007105.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what kind of nuts are in the blue packet?",
|
|
"answer": "peanuts",
|
|
"gt_answers": [
|
|
"MACADAMIA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03482405_8976.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What warning does is written on the back of this truck?",
|
|
"answer": "do not attempt to drive this vehicle",
|
|
"gt_answers": [
|
|
"Caution Frequently Stopping Vehicle"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316663.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the first aid kit on the wall?",
|
|
"answer": "first aid only",
|
|
"gt_answers": [
|
|
"Xpect"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/379.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who sponsored the match?",
|
|
"answer": "red bull",
|
|
"gt_answers": [
|
|
"HORNIG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000233292.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the boat number?",
|
|
"answer": "199",
|
|
"gt_answers": [
|
|
"29911"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02981792_13549.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is written on the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.versaci.fr"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4803.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the appliance this woman is holding?",
|
|
"answer": "crock pot",
|
|
"gt_answers": [
|
|
"Crock Pot"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03133878_2839.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written here?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BNP PARI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000573248.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the pier?",
|
|
"answer": "central pier",
|
|
"gt_answers": [
|
|
"Central Pier",
|
|
"CENTRAL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349953.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bottom of the sign in blue?",
|
|
"answer": "saigon",
|
|
"gt_answers": [
|
|
"BA TRIEU"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415487.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What climate is the tap associated with the refrigerator described as?",
|
|
"answer": "cold",
|
|
"gt_answers": [
|
|
"COLD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000545200.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"45379"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412681.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the coffee shop?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"St. Marc Cafe"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the cafe?",
|
|
"answer": "mr coffee",
|
|
"gt_answers": [
|
|
"St. Marc Cafe"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "mulberry street",
|
|
"gt_answers": [
|
|
"TAYLOR ST MULBERRY ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000288131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the headline of the poster (first line)?",
|
|
"answer": "dead man talking",
|
|
"gt_answers": [
|
|
"DEAD MAN TALKING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328739.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who runs the campaign?",
|
|
"answer": "dead man talking",
|
|
"gt_answers": [
|
|
"Texas Department of Transportation"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328739.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the team name on the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Melbourne Tigers"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the city the team is from?",
|
|
"answer": "melbourne",
|
|
"gt_answers": [
|
|
"Melbourne"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is this parking meter located in?",
|
|
"answer": "chicago",
|
|
"gt_answers": [
|
|
"Denver"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000415475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the watermark?",
|
|
"answer": "impala",
|
|
"gt_answers": [
|
|
"iStockphoto"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02423022_6531.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the top right hand side read?",
|
|
"answer": "dior",
|
|
"gt_answers": [
|
|
"Dior On Stage"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the shop?",
|
|
"answer": "asian food",
|
|
"gt_answers": [
|
|
"Ben Ngii",
|
|
"BEN NGIL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is on the umbrella?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"Coca Cola"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"68"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000394139.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many necklaces does the man have on?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"two",
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319312.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red traffic sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "`What is written on the yellow traffic sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Two way traffic"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much are items?",
|
|
"answer": "they are cheap",
|
|
"gt_answers": [
|
|
".99\ufffd\ufffd"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the store called?",
|
|
"answer": "del taco",
|
|
"gt_answers": [
|
|
"DELRIO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "In which color bus people boarding?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Blue",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370002.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this located?",
|
|
"answer": "in a parade",
|
|
"gt_answers": [
|
|
"St. John's"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000200080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is pictured?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"JOHN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000200080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four letters can you see after \"St. John's\"?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Epis"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000200080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus station?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"11"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000169640.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the technology company on the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Samsung"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000283884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letters on the tail of the plane?",
|
|
"answer": "japan airlines",
|
|
"gt_answers": [
|
|
"ana"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000362130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country does the plane work for?",
|
|
"answer": "japan",
|
|
"gt_answers": [
|
|
"japan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000362130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three letters are on the plane in the background?",
|
|
"answer": "aa",
|
|
"gt_answers": [
|
|
"ana"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000362130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word in white starts with the letter C?",
|
|
"answer": "chainmail",
|
|
"gt_answers": [
|
|
"Crusade"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03000247_9278.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the 2 words on the book, that are hand written?",
|
|
"answer": "sheep lamb",
|
|
"gt_answers": [
|
|
"sheep barn"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351440.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the 1 word written on the book that is printed by a computer?",
|
|
"answer": "sheep",
|
|
"gt_answers": [
|
|
"record"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351440.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the train engine.",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"52"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the 2 words written on the face of the clock?",
|
|
"answer": "roman numerals",
|
|
"gt_answers": [
|
|
"Pub Restauarant"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376030.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "bata",
|
|
"gt_answers": [
|
|
"BATA"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city name is written on the building?",
|
|
"answer": "melbourne",
|
|
"gt_answers": [
|
|
"London"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000339074.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what street is listed on the right?",
|
|
"answer": "fifth avenue",
|
|
"gt_answers": [
|
|
"fifth ave"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white text on the dog's collar?",
|
|
"answer": "schnauzer",
|
|
"gt_answers": [
|
|
"WOOF"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02097209_6366.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word at the top of the can that starts with M?",
|
|
"answer": "mountain",
|
|
"gt_answers": [
|
|
"MENOS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000422828.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the first word on the sign?",
|
|
"answer": "old",
|
|
"gt_answers": [
|
|
"ragnar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411905.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number range is located on the bottom left?",
|
|
"answer": "m to l",
|
|
"gt_answers": [
|
|
"6-11",
|
|
"6-11 KG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03188531_15208.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number worn by the player in red?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"12"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408044.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number worn by the player in orange?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"34"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408044.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photo agency?",
|
|
"answer": "stock photo",
|
|
"gt_answers": [
|
|
"Foto S.A."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02786058_6977.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does Foto S.A. do?",
|
|
"answer": "stock photography",
|
|
"gt_answers": [
|
|
"Photo and servicing agency"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02786058_6977.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company owns this photo?",
|
|
"answer": "getty images",
|
|
"gt_answers": [
|
|
"Foto S.A."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02786058_6977.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the street?",
|
|
"answer": "third street",
|
|
"gt_answers": [
|
|
"Third Street Promenade",
|
|
"THIRD STREET"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365141.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of vehicle is allowed on the right most lane?",
|
|
"answer": "buses",
|
|
"gt_answers": [
|
|
"Buses only"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365141.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is this a picture of?",
|
|
"answer": "bridge",
|
|
"gt_answers": [
|
|
"bridge",
|
|
"CLIFDEN SUSPENSION BRIDGE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04366367_1851.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes the truck depicted in the photo?",
|
|
"answer": "volvo",
|
|
"gt_answers": [
|
|
"scania"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000545676.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the door?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"1105"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401207.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What counrry is mentioned in thebtext?",
|
|
"answer": "india",
|
|
"gt_answers": [
|
|
"India"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is used at least four times in this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"up"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000426853.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the phone number for Arial Equip.?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"877.734.8400"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000426853.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is abbreviated in this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"equip."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000426853.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken according to the dateline?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2006"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000520654.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the beer?",
|
|
"answer": "stella artois",
|
|
"gt_answers": [
|
|
"Stella Artois"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341019.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the remote control?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"LG"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341019.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which sport is he playing?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What gender is the player?",
|
|
"answer": "male",
|
|
"gt_answers": [
|
|
"Male"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is he holding in his hand?",
|
|
"answer": "towel",
|
|
"gt_answers": [
|
|
"Towel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store with the red background?",
|
|
"answer": "joy q",
|
|
"gt_answers": [
|
|
"The Gift Shop"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of services are offered from the store with the yellow store sign?",
|
|
"answer": "beauty salon",
|
|
"gt_answers": [
|
|
"Hair salon"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the \"J\" word in the upper right corner?",
|
|
"answer": "joy",
|
|
"gt_answers": [
|
|
"Joy"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much seter on bus",
|
|
"answer": "seter on bus",
|
|
"gt_answers": [
|
|
"47"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332782.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of bus",
|
|
"answer": "luxusbus",
|
|
"gt_answers": [
|
|
"luksusbuss"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332782.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the larger book under the computer?",
|
|
"answer": "cat in the hat",
|
|
"gt_answers": [
|
|
"great houses of britain"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053479.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many women are featured in the black book",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"100"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053479.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of spider is this?",
|
|
"answer": "wolf spider",
|
|
"gt_answers": [
|
|
"wolf spider"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01775062_7047.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team does the pitcher pitch for?",
|
|
"answer": "new york mets",
|
|
"gt_answers": [
|
|
"Camden"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373332.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building",
|
|
"answer": "the buttery",
|
|
"gt_answers": [
|
|
"THE BUTTERY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the white board",
|
|
"answer": "the buttery",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the suitcase?",
|
|
"answer": "fragile",
|
|
"gt_answers": [
|
|
"THIS SIDE UP FRAGILE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345924.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the corner of the box?",
|
|
"answer": "5",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What business is listed in the bottom right corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"photography",
|
|
"PETE LABROZZI PHOTOGRAPHY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000128706.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red and white banner read?",
|
|
"answer": "i'm in the office",
|
|
"gt_answers": [
|
|
"FORDHAM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000541491.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white on the blue motorcycle?",
|
|
"answer": "berlin",
|
|
"gt_answers": [
|
|
"Milka"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000511752.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign showing you how to wash?",
|
|
"answer": "how to brush your teeth",
|
|
"gt_answers": [
|
|
"Hands"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the building?",
|
|
"answer": "blue sky",
|
|
"gt_answers": [
|
|
"parachute air club"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403255.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "whats the name of the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"radio city"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000193867.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the engine no.?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"L.150"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of train?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374709.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white text on the red sign?",
|
|
"answer": "apollo hospital",
|
|
"gt_answers": [
|
|
"24 HOURS EMERGENCY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000137.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "jen's take home",
|
|
"gt_answers": [
|
|
"Jeni's Take Home"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375180.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are displayed on the right of the pitcher?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"326"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n09835506_4745.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is in the advertisement on the pitcher's left?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"AIG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n09835506_4745.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the building",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SIBLEY WAREHOUSE & STORAGE COS FIRE PROOF WAREHOUSE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000105468.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "all way",
|
|
"gt_answers": [
|
|
"Stop All-Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the license plate on the motorcycle?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"HAIRDR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the box?",
|
|
"answer": "a calculator",
|
|
"gt_answers": [
|
|
"Phone"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000047904.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the phone for?",
|
|
"answer": "it's a calculator",
|
|
"gt_answers": [
|
|
"Calling for",
|
|
"CALLING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000047904.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the TV show?",
|
|
"answer": "tv show",
|
|
"gt_answers": [
|
|
"M*A*S*H"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344777.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the green vehicle?",
|
|
"answer": "tahoe",
|
|
"gt_answers": [
|
|
"PAF808"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594945_5234.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the yellow sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"Danger Keep Away"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594945_5234.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of simple trip planner",
|
|
"answer": "a cell phone",
|
|
"gt_answers": [
|
|
"Metro"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000227568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in the start box",
|
|
"answer": "lg g2",
|
|
"gt_answers": [
|
|
"Laist hq"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000227568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in the end box",
|
|
"answer": "lg cell phone",
|
|
"gt_answers": [
|
|
"7th/grand"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000227568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many minutes do you get for $1.00?",
|
|
"answer": "30 minutes",
|
|
"gt_answers": [
|
|
"3 MINUTOS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03902125_5633.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the side of the plane?",
|
|
"answer": "air transat",
|
|
"gt_answers": [
|
|
"Air Transat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373794.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to the next stop",
|
|
"gt_answers": [
|
|
"Downtown"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403728.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS DISPLAYED ON THE WHITE JACKETS?",
|
|
"answer": "ems",
|
|
"gt_answers": [
|
|
"POLIZIA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04336792_15017.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NUMBER DISPLAYED ON LEFT SIDE OF THE RED VEHICLE?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"115"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04336792_15017.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THE LEFT SIDE OF THE RED VEHICLE?",
|
|
"answer": "ems",
|
|
"gt_answers": [
|
|
"VIGILI DEL FUOCO 115",
|
|
"VIGILI DEL FUOCO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04336792_15017.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the bus on the left",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sebastian"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000526922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type is the first word of the road this building is on?",
|
|
"answer": "road",
|
|
"gt_answers": [
|
|
"LANE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000552791.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word of the road (Lane [ ] Rd)?",
|
|
"answer": "lane",
|
|
"gt_answers": [
|
|
"HEAD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000552791.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is this building on a ST or RD?",
|
|
"answer": "st",
|
|
"gt_answers": [
|
|
"RD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000552791.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What date was this image taken?",
|
|
"answer": "the date is unknown",
|
|
"gt_answers": [
|
|
"2003 6 8"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written on the white background on the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"classic"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000456416.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country does the sign welcome the driver to?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"Toronto"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371622.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of chocoate is sold in the image?",
|
|
"answer": "cadbury dairy milk",
|
|
"gt_answers": [
|
|
"Cadbury"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_183.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the babies name?",
|
|
"answer": "no name",
|
|
"gt_answers": [
|
|
"Schwaller"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02804414_8548.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sale is advertised for the store in the image?",
|
|
"answer": "50% off sale",
|
|
"gt_answers": [
|
|
"40%"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Norway country code?",
|
|
"answer": "0047",
|
|
"gt_answers": [
|
|
"nor"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000061293.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the set of letters before NOR?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"dnb"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000061293.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What set of letters has two capitals and one lowercase?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"dnb"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000061293.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kid of train service is in the picture?",
|
|
"answer": "freight train",
|
|
"gt_answers": [
|
|
"railpool"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000494768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which train company is being used?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"txlogistik"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000494768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train's \"plate\" number?",
|
|
"answer": "nr nr",
|
|
"gt_answers": [
|
|
"185 693-9"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000494768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of vehicle is this?",
|
|
"answer": "taxi",
|
|
"gt_answers": [
|
|
"Taxi"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02930766_33429.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this taxi's number?",
|
|
"answer": "788",
|
|
"gt_answers": [
|
|
"788"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02930766_33429.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of hot sauce?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TABASCO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000157639.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is an exciting experience?",
|
|
"answer": "a shopping mall",
|
|
"gt_answers": [
|
|
"Shopping"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_70.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Under the sign banning cars and motorcycles, what is the word printed at the very bottom?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"SALLITTU"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362100.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street on the street sign?",
|
|
"answer": "k street",
|
|
"gt_answers": [
|
|
"K ST NW"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/440.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "I they go left, what Altitude would they reach?",
|
|
"answer": "3000 feet",
|
|
"gt_answers": [
|
|
"Altitude 2000",
|
|
"2000"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What logo is present in the picture?",
|
|
"answer": "at&t",
|
|
"gt_answers": [
|
|
"Johnson Controls"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of this airline?",
|
|
"answer": "air china",
|
|
"gt_answers": [
|
|
"CHINA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259553.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the baseball players jersey say?",
|
|
"answer": "texas rangers",
|
|
"gt_answers": [
|
|
"Wagner 35"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand in the packages?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Starkist"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000248191.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the street sign say?",
|
|
"answer": "yield",
|
|
"gt_answers": [
|
|
"YIELD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What costs 3.99?",
|
|
"answer": "oranges",
|
|
"gt_answers": [
|
|
"clementines"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414544.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the jersey of the girl holding the ball?",
|
|
"answer": "14",
|
|
"gt_answers": [
|
|
"14"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02802426_18069.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the jersey of the player holding the ball?",
|
|
"answer": "14",
|
|
"gt_answers": [
|
|
"14"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02802426_18069.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the hotel?",
|
|
"answer": "hilton garden inn",
|
|
"gt_answers": [
|
|
"Marriott"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000741.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is this sign for?",
|
|
"answer": "manhattan",
|
|
"gt_answers": [
|
|
"Manhattan"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401009.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where does the road west lead?",
|
|
"answer": "manhattan",
|
|
"gt_answers": [
|
|
"Canal St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401009.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where does the road north lead?",
|
|
"answer": "manhattan",
|
|
"gt_answers": [
|
|
"Bowery"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401009.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue sign behind the elephant?",
|
|
"answer": "the elephant",
|
|
"gt_answers": [
|
|
"San Ramosa"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402498.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the paper?",
|
|
"answer": "20",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000556162.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the fin of the lead plane?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000246982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the fin of the back plane?",
|
|
"answer": "91",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000246982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is given on flight",
|
|
"answer": "flight number",
|
|
"gt_answers": [
|
|
"N361PH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354005.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made the sewing machine?",
|
|
"answer": "kenmore",
|
|
"gt_answers": [
|
|
"brother"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04179913_1798.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What Number is on the Television?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000272741.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name in the upper left corner of the image?",
|
|
"answer": "johnny d",
|
|
"gt_answers": [
|
|
"Angel Soler"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02804610_19486.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the tag?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Phillip"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01496331_11875.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does this sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP ALL WAY",
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what should a person do when seeing this sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP ALL WAY",
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this sign represent?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP ALL WAY",
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being celebrated?",
|
|
"answer": "birthday",
|
|
"gt_answers": [
|
|
"BIRTHDAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374826.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What body organ is mentioned on the banner in the background?",
|
|
"answer": "liver",
|
|
"gt_answers": [
|
|
"Brains"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT MOVIE IS THE POSTER OF?",
|
|
"answer": "the matrix",
|
|
"gt_answers": [
|
|
"firefox"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000035094.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHO IS THE STAR IN THE MOVIE POSTER?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"clint eastwood"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000035094.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHO'S FIRST NAME IS ON WALL?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"noah"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000035094.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What colour is the woman's bag?",
|
|
"answer": "brown",
|
|
"gt_answers": [
|
|
"Blue",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713352.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What popular body spray brand is printed on the sleeves of each player?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"AXE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000054375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are printed in the circle on each players hat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"RC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000054375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the emblem name on the shirt's white sleeve?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"XCEL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000340331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the first word on the black shirt?",
|
|
"answer": "man",
|
|
"gt_answers": [
|
|
"rock"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03141823_1627.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "esta",
|
|
"gt_answers": [
|
|
"estafeta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000284333.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT ISN WRITTEN IN FRONT OF VEHICLE",
|
|
"answer": "mustang",
|
|
"gt_answers": [
|
|
"SUNSET CLASSICS",
|
|
"SUNSETCLASSICS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03459775_12633.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the label above the sweets on the left?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Mentos"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_669.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the motorcycle's shock absorber?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"YAMAHA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000323125.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is shown on the motorcycle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"74"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000323125.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the purple bike?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"7466"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the batter?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Gomez"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000551372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the train engine?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"1040"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000321302.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign says?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413686.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the white sign?",
|
|
"answer": "forklifts not allowed",
|
|
"gt_answers": [
|
|
"Vehicles parked entirely at owners risk."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are on the bus?",
|
|
"answer": "123",
|
|
"gt_answers": [
|
|
"123"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413520.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plat number fo the white four door car?",
|
|
"answer": "the license plate number of the white four door car",
|
|
"gt_answers": [
|
|
"DL9C JO912",
|
|
"DL9C J0912"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354951.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the longest display text shown?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"SAMSUNG"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001057.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the shortest display text displayed?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"ING"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001057.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four letters appear before bus?",
|
|
"answer": "nyc",
|
|
"gt_answers": [
|
|
"NYCT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1591919.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company does the truck belong to?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Retrograde Ltd."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000342132.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what percentage refer on top of fridge",
|
|
"answer": "0 percent",
|
|
"gt_answers": [
|
|
"0%"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326710.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the shirt say?",
|
|
"answer": "i'm with stupid",
|
|
"gt_answers": [
|
|
"I'm with stupid"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352182.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "united airlines",
|
|
"gt_answers": [
|
|
"UNITED"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is written on the plane?",
|
|
"answer": "united airlines",
|
|
"gt_answers": [
|
|
"UNITED"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is Capital Hill Neighborhood located?",
|
|
"answer": "washington dc",
|
|
"gt_answers": [
|
|
"Salt Lake City"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is another name for the street Bliss Ct.?",
|
|
"answer": "bliss road",
|
|
"gt_answers": [
|
|
"550 W."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is next to the insect to show their same size?",
|
|
"answer": "toothpaste",
|
|
"gt_answers": [
|
|
"Chapstick"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02174001_2430.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in the red logo?",
|
|
"answer": "1844",
|
|
"gt_answers": [
|
|
"FULLER"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is black?",
|
|
"answer": "18",
|
|
"gt_answers": [
|
|
"184"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is red?",
|
|
"answer": "18",
|
|
"gt_answers": [
|
|
"100"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the college name on the sweatshirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FRANKLIN MARSHALL",
|
|
"FRANKLIN & MARSHALL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000086524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the sporting company that is on the red sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MODELL'S"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000411934.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Modell's is an advertisement for what type of goods?",
|
|
"answer": "cigarettes",
|
|
"gt_answers": [
|
|
"SPORTING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000411934.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the exterior of the cardboard box?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the top right ?",
|
|
"answer": "lee kerker leather",
|
|
"gt_answers": [
|
|
"IMA Webley Replica"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03527444_23076.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on this car's licence plate?",
|
|
"answer": "citroen",
|
|
"gt_answers": [
|
|
"1415 DNZ"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03670208_35397.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When something is great you may call it this?",
|
|
"answer": "grand",
|
|
"gt_answers": [
|
|
"super"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000541900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the black shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"99"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000223299.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is listed on the fencing?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"SFR QUIKSILVER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366614.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Q: What does St. mean? A: Street",
|
|
"answer": "talbot st shops & restaurants",
|
|
"gt_answers": [
|
|
"st."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402847.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Q: What letter does the street start with? A: T",
|
|
"answer": "talbot street",
|
|
"gt_answers": [
|
|
"t"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000402847.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on player's jersey?",
|
|
"answer": "johnson",
|
|
"gt_answers": [
|
|
"FISHER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350949.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the jersey number of player?",
|
|
"answer": "23",
|
|
"gt_answers": [
|
|
"46"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350949.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the clothing brand?",
|
|
"answer": "vans",
|
|
"gt_answers": [
|
|
"LIFEGUARD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370412.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year on the photo?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2005"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02007558_6183.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in Wooden board",
|
|
"answer": "elephant sanctuary",
|
|
"gt_answers": [
|
|
"TRAINING CENTER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323337.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number on the shirts?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000498706.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of skiers?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000498706.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is above TWIN PEAKS?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"200"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320787.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red street sign instructing you to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320787.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the text in the blue sign?",
|
|
"answer": "obama bid",
|
|
"gt_answers": [
|
|
"OBAMA BIDEN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400245.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When does fonuts close?",
|
|
"answer": "5 pm",
|
|
"gt_answers": [
|
|
"6:00 PM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325398.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numerical version of LIGHTROOM is on the computer screen?",
|
|
"answer": "6",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000405250.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after taste?",
|
|
"answer": "texture",
|
|
"gt_answers": [
|
|
"NATURALLY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is the woman's nail polish petting the dog?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Black"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02110958_4249.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word on the front of the player's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"VALLEY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000012434.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of crab is this?",
|
|
"answer": "carabina maxima",
|
|
"gt_answers": [
|
|
"Green Crab"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01978455_8146.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many players are on the baseball field?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three",
|
|
"two"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401837.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the side of the truck say?",
|
|
"answer": "shell",
|
|
"gt_answers": [
|
|
"Shell"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price of the guillotine in Euro?",
|
|
"answer": "it costs 99 euros",
|
|
"gt_answers": [
|
|
"99"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03467068_19017.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the tractor cost in the picture?",
|
|
"answer": "it cost $600",
|
|
"gt_answers": [
|
|
"653.2"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04465501_4826.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is Parkway motor co located?",
|
|
"answer": "indianapolis, indiana",
|
|
"gt_answers": [
|
|
"1065 Wisconsin ave"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04465501_4826.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the side of the train?",
|
|
"answer": "ms stl",
|
|
"gt_answers": [
|
|
"654"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000200369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website watermarked in the bottom right corner of the image?",
|
|
"answer": "rossbach",
|
|
"gt_answers": [
|
|
"meine-reiseberichte.net"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353514.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT LETTERS ARE WRITTEN ON THE BIG WHITE BOX?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ENEL ENEL",
|
|
"Enel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000534513.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "HOW MANY TIMES IS THE WORD FedEx WRITTEN ON THE CHAIR?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"FED EX 5 TIMES",
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000534513.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT COLORS ARE THE LETTERS, COH?",
|
|
"answer": "blue and white",
|
|
"gt_answers": [
|
|
"COH RED",
|
|
"red"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000534513.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of formula is this?",
|
|
"answer": "infant formula",
|
|
"gt_answers": [
|
|
"Original"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002160.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the hashtag on the computer?",
|
|
"answer": "tfw",
|
|
"gt_answers": [
|
|
"#apprentice"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline that operate the plane?",
|
|
"answer": "virgin airlines",
|
|
"gt_answers": [
|
|
"Virgin America"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1593018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first brand on the sign?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"Sprint"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370792.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are in yellow paint, on the sign?",
|
|
"answer": "benidorm",
|
|
"gt_answers": [
|
|
"Bienvenido A Mal Pais"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign",
|
|
"answer": "reserved",
|
|
"gt_answers": [
|
|
"Rotunda"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_33320.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the tennis bag have written on it?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Babolat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331695.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THIS SHOP?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"OSAKA OHSHO"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_308.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline does this plane belong to?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline name is on the plane?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three letters on the aircraft stand for Scandinavian Airline System?",
|
|
"answer": "sas",
|
|
"gt_answers": [
|
|
"sas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sponsor is advertised in the background wall?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"MERCEDES-BENZ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324911.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of bank shown ?",
|
|
"answer": "citibank",
|
|
"gt_answers": [
|
|
"Chase"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2418.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of market is on the right?",
|
|
"answer": "fish market",
|
|
"gt_answers": [
|
|
"farmers ma"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042371.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the toy on the box?",
|
|
"answer": "space shuttle booster rockets",
|
|
"gt_answers": [
|
|
"Space Shuttle & Booster Rockets"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04266014_2940.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What chocolate kept in the plate?",
|
|
"answer": "milk chocolate",
|
|
"gt_answers": [
|
|
"Kitkat",
|
|
"Kit Kat"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000226075.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the child's shirt?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Mets"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336239.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the child's hat?",
|
|
"answer": "m",
|
|
"gt_answers": [
|
|
"M"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336239.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what vitamins does it give you?",
|
|
"answer": "a, b, c, d, e, f, g, h, i, j, k",
|
|
"gt_answers": [
|
|
"A & C"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the yellow license plate?",
|
|
"answer": "audi",
|
|
"gt_answers": [
|
|
"KEYES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02814533_7283.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many days are mentioned?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000425685.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bank?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ASB"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000425685.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the stand?",
|
|
"answer": "newsagency",
|
|
"gt_answers": [
|
|
"Newsagency"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written around the table in the front of the stand?",
|
|
"answer": "newsagency",
|
|
"gt_answers": [
|
|
"Book Sale"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Wich numbers are in the top of the bus?",
|
|
"answer": "0 0",
|
|
"gt_answers": [
|
|
"41 18"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359833.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are written in pink letters on the window?",
|
|
"answer": "prada",
|
|
"gt_answers": [
|
|
"PINK PRESENTS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is the red bus welcoming you?",
|
|
"answer": "half moon bay",
|
|
"gt_answers": [
|
|
"Half moon bay"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349450.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the pub advertised on the red bus?",
|
|
"answer": "half moon bay",
|
|
"gt_answers": [
|
|
"Cameron's bus",
|
|
"CAMERON'S PUB"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349450.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the drink?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Coca Cola"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000283.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website called?",
|
|
"answer": "mycoke",
|
|
"gt_answers": [
|
|
"myenjoyzone.com"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000283.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many cell phones are on the table?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368825.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the sign next to the red light?",
|
|
"answer": "bike",
|
|
"gt_answers": [
|
|
"Second"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is underneath the number 65?",
|
|
"answer": "65",
|
|
"gt_answers": [
|
|
"81"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two words are underneath the picture of the bike?",
|
|
"answer": "bike lane",
|
|
"gt_answers": [
|
|
"Bike Lane"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many windows are on Benny's Shop?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374906.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter on the flog above O?",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"E"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000033505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "the time?",
|
|
"answer": "the cat is watching the game",
|
|
"gt_answers": [
|
|
"5:52",
|
|
"5:52 PM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000257669.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white and black right pointing sign say?",
|
|
"answer": "tunis",
|
|
"gt_answers": [
|
|
"Tunis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374735.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say as a title in blue letters on the white sign?",
|
|
"answer": "the dining hall",
|
|
"gt_answers": [
|
|
"The Missing Man Table"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367682.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company in the photo?",
|
|
"answer": "coco frio",
|
|
"gt_answers": [
|
|
"Coco Frios"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348197.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the web address of the company?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"www.myprofe.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is the airplane from?",
|
|
"answer": "singapore airlines",
|
|
"gt_answers": [
|
|
"Singapore Airlines"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_9218.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the green ensigns?",
|
|
"answer": "prince",
|
|
"gt_answers": [
|
|
"prince"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324799.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are on the picture above the bed?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"A dream is a wish your heart makes."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000433236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the word on the building say?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"ED AR KAROTITI Samsung"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the blue bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000002639.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the team name on the baseball jersey?",
|
|
"answer": "indians",
|
|
"gt_answers": [
|
|
"Indians"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362864.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name listed on the white controller?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SONY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259790.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand listed on the black device?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SONY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259790.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is it?",
|
|
"answer": "it is noon",
|
|
"gt_answers": [
|
|
"17:35"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592645.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do they sell next to roberto's cakes?",
|
|
"answer": "sausages",
|
|
"gt_answers": [
|
|
"Octopus"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3112.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Weather it has close or open",
|
|
"answer": "open",
|
|
"gt_answers": [
|
|
"OPEN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top word displayed on the partition?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BIRCH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000457149.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bottom word displayed on the partition?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HILL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000457149.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number in the cap",
|
|
"answer": "seven",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02877765_15843.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the fruit?",
|
|
"answer": "pomegranate",
|
|
"gt_answers": [
|
|
"Pomegranate"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07768694_9879.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written in white letters on a red background?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sandwich"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000569591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the five numbers after Odessa?",
|
|
"answer": "odessa, ukraine",
|
|
"gt_answers": [
|
|
"75014"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000569591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word to the left of the wheelchair?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"interdit"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000569591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign warn you not to forget?",
|
|
"answer": "to lock your bike",
|
|
"gt_answers": [
|
|
"your bicycle",
|
|
"BICYCLE"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4460.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title at the top of the white paper?",
|
|
"answer": "bicycle passengers",
|
|
"gt_answers": [
|
|
"Bicycle Passengers"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4460.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is 1 serving size?",
|
|
"answer": "1 cup",
|
|
"gt_answers": [
|
|
"1 cup (245g)",
|
|
"1 CUP"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many servings per container?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"About 2.",
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many calories in a serving?",
|
|
"answer": "260 calories per serving",
|
|
"gt_answers": [
|
|
"110"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company?",
|
|
"answer": "uhaul",
|
|
"gt_answers": [
|
|
"U-HAUL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368808.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this restaurant?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"THE WORKS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000248478.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is being advertised by the yellow entryway banners?",
|
|
"answer": "lego",
|
|
"gt_answers": [
|
|
"Lego"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_464.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What product is advertised on the two orange signs?",
|
|
"answer": "lego",
|
|
"gt_answers": [
|
|
"LEGO"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_464.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first English word written on the blue sign?",
|
|
"answer": "bank",
|
|
"gt_answers": [
|
|
"reserve"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000978.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name do you see on a boy's shirt?",
|
|
"answer": "sam",
|
|
"gt_answers": [
|
|
"EVERLAST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000306353.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 6 characters that come after the dash at the top of the photo?",
|
|
"answer": "a border collie",
|
|
"gt_answers": [
|
|
"DAJ064"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02106030_3624.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 9 characters that come before the dash at the top of the photo?",
|
|
"answer": "border collie",
|
|
"gt_answers": [
|
|
"81120016H"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02106030_3624.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text in this image? Exclude any symbols",
|
|
"answer": "a border collie in the grass",
|
|
"gt_answers": [
|
|
"81120016H DAJ064"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02106030_3624.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the initials on the front of the train?",
|
|
"answer": "sb",
|
|
"gt_answers": [
|
|
"DB"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign with black letter on the railing say?",
|
|
"answer": "red bull",
|
|
"gt_answers": [
|
|
"Red Bull"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351307.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What advertiser is written on the bottom step?",
|
|
"answer": "red bull",
|
|
"gt_answers": [
|
|
"Converse"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351307.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who's name is on the top of this?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Yngwie Johann Malmsteen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03272010_605.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the musician?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Yngwie Johann Malmsteen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03272010_605.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building that is painted white?",
|
|
"answer": "victoria station",
|
|
"gt_answers": [
|
|
"The Wellington.",
|
|
"THE WELLINGTON"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4494.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is advertised on the window?",
|
|
"answer": "youtube",
|
|
"gt_answers": [
|
|
"mamasandpapas.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the tagline for the store?",
|
|
"answer": "if you can dream it we can make it",
|
|
"gt_answers": [
|
|
"Quality & Value for Everyone"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which red letters are on the man's green shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MWT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000360147.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the lane closed?",
|
|
"answer": "ahead",
|
|
"gt_answers": [
|
|
"Ahead"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company works in this building?",
|
|
"answer": "deutsche bank",
|
|
"gt_answers": [
|
|
"IBM"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000027.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of animal is it?",
|
|
"answer": "snake",
|
|
"gt_answers": [
|
|
"Snake"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01753488_2778.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is displayed?",
|
|
"answer": "delta",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000444210.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the identification number of the plane?",
|
|
"answer": "msn 370",
|
|
"gt_answers": [
|
|
"3754"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000444210.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What SPF level is the sun screen?",
|
|
"answer": "spf 45",
|
|
"gt_answers": [
|
|
"45"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04357314_14941.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written above \"Sun Block\"?",
|
|
"answer": "no alcohol",
|
|
"gt_answers": [
|
|
"Babies"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04357314_14941.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does MW stand for?",
|
|
"answer": "my world",
|
|
"gt_answers": [
|
|
"mark warner"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000461509.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the back of the device?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"Cingular"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116735.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the equipment?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"Beno"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04009552_7776.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers listed below the innitials?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"305"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405366.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the third word in the bottom right corner of the frame?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PHOTO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000118186.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the photographer who took this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MICHAEL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000118186.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the photographer who took this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TOONE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000118186.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the earliest year in the watermark?",
|
|
"answer": "1860",
|
|
"gt_answers": [
|
|
"2005"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02009912_14063.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the latest year in the watermark?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02009912_14063.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in largest type on the package of toilet paper rolls?",
|
|
"answer": "toilet paper",
|
|
"gt_answers": [
|
|
"Edet"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173751.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in the back",
|
|
"answer": "prince",
|
|
"gt_answers": [
|
|
"prince"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328924.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is advertised in neon lights?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"city"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000358100.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the six-letter name of the street listed in the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"NEWTON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000004481.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate on the red bus?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"Y849 TGH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285954.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the snake?",
|
|
"answer": "cobra",
|
|
"gt_answers": [
|
|
"punchstock"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01748264_15413.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the clock say?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"6:36:55"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000020524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the mug from?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"San Francisco",
|
|
"SAN FRANCISO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000020524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the bar on the right side of the sign?",
|
|
"answer": "grand opera",
|
|
"gt_answers": [
|
|
"monk bar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367332.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the smallest number on the clock face?",
|
|
"answer": "12",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000354569.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of food is in this picture?",
|
|
"answer": "sandwiches",
|
|
"gt_answers": [
|
|
"Sandwich"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000206.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is the woman walking on?",
|
|
"answer": "6th street",
|
|
"gt_answers": [
|
|
"Jose Sarria Ct"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416133.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name display in the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"daniel",
|
|
"DANIEL BLEUMINK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499360.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is display in the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"102"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499360.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the image?",
|
|
"answer": "click",
|
|
"gt_answers": [
|
|
"Click"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03146219_1120.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on the front shuttle?",
|
|
"answer": "mta transit",
|
|
"gt_answers": [
|
|
"Dartline"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the road sign say?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416359.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which street is shown on the sign?",
|
|
"answer": "main street",
|
|
"gt_answers": [
|
|
"Moor St",
|
|
"MOOR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where can I get Pepsi?",
|
|
"answer": "sun valley market",
|
|
"gt_answers": [
|
|
"Sun Valley Market"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343629.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What Avenue is the market on?",
|
|
"answer": "sun valley avenue",
|
|
"gt_answers": [
|
|
"10th Avenue"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343629.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of food comes in this package?",
|
|
"answer": "chicken",
|
|
"gt_answers": [
|
|
"Chicken Burgers"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Biobus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000382320.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this watch?",
|
|
"answer": "konus",
|
|
"gt_answers": [
|
|
"Konus"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03197337_4422.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does the left watch say?",
|
|
"answer": "12 o clock",
|
|
"gt_answers": [
|
|
"12:00"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03197337_4422.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE BUS NUMBER?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"153"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000265351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHERE DOES THE BUS GO?",
|
|
"answer": "to the city centre",
|
|
"gt_answers": [
|
|
"CASTLEFORD VIA ROTHWELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000265351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE BUSES LICENSE PLATE?",
|
|
"answer": "bh bh",
|
|
"gt_answers": [
|
|
"S481 ANW"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000265351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the one brand sponsor visible in this image?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"POLO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377341.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on this building?",
|
|
"answer": "vodafone",
|
|
"gt_answers": [
|
|
"vodafone"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001239.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the plane?",
|
|
"answer": "amerijet",
|
|
"gt_answers": [
|
|
"BONANZA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the park?",
|
|
"answer": "essex park",
|
|
"gt_answers": [
|
|
"n.3",
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000286144.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of food is sold here?",
|
|
"answer": "chicken",
|
|
"gt_answers": [
|
|
"Sushi"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_710.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is sponsored in this tennis game?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"Lacoste"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346220.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What action is described from the first three letters of the bold, teal lettering?",
|
|
"answer": "digital",
|
|
"gt_answers": [
|
|
"DIG"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400547.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who is the author of the book?",
|
|
"answer": "charles dickens",
|
|
"gt_answers": [
|
|
"judith kerr"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316672.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say in the middle picture?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"BAKER MARKET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign above the door say?",
|
|
"answer": "laboratory",
|
|
"gt_answers": [
|
|
"Exit"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03630383_6511.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the logo on the man's labcoat?",
|
|
"answer": "pfizer",
|
|
"gt_answers": [
|
|
"TSG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03630383_6511.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What road is on the sign?",
|
|
"answer": "college rd",
|
|
"gt_answers": [
|
|
"College rd"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the forklift?",
|
|
"answer": "hyster",
|
|
"gt_answers": [
|
|
"Xilin"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03384352_7342.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOUR OF DRESS",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"BLACK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350954.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What direction is the sign pointing out?",
|
|
"answer": "east",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416347.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed in white on a red background?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"Pedestrians"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416347.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last two numbers on the license plate?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"39"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116196.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word after the word POLICE on the back of the boat?",
|
|
"answer": "patrol",
|
|
"gt_answers": [
|
|
"ENFORCEMENT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116196.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the boat directly below the window?",
|
|
"answer": "patrol",
|
|
"gt_answers": [
|
|
"POLICE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116196.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign?",
|
|
"answer": "this",
|
|
"gt_answers": [
|
|
"This"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000120595.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is the photo?",
|
|
"answer": "1940",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03041632_34505.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cab number?",
|
|
"answer": "57",
|
|
"gt_answers": [
|
|
"Y809"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03770679_22242.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What telephone number is on the cab?",
|
|
"answer": "555 555",
|
|
"gt_answers": [
|
|
"426-6262"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03770679_22242.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What colour is the sky",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04562935_21369.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is listed on the blue boat?",
|
|
"answer": "sail",
|
|
"gt_answers": [
|
|
"WATERMARK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03216828_56117.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the engine number?",
|
|
"answer": "c40 8",
|
|
"gt_answers": [
|
|
"6814"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000366058.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word in all caps on the central arch?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000391053.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year in on the picture",
|
|
"answer": "1970's",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03337140_399.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What game is being played in this image?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316076.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the 3 number combination displayed in this image?",
|
|
"answer": "617",
|
|
"gt_answers": [
|
|
"524"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132262.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say?",
|
|
"answer": "hollywood",
|
|
"gt_answers": [
|
|
"Hollywood"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000581.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cost of the objects in the center of the image",
|
|
"answer": "ice cream",
|
|
"gt_answers": [
|
|
"$7.55"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the top line of the girl's shirt?",
|
|
"answer": "star",
|
|
"gt_answers": [
|
|
"Sing"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361655.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played?",
|
|
"answer": "cross country skiing",
|
|
"gt_answers": [
|
|
"Skiiing"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How far is Santa Cruz according to the sign?",
|
|
"answer": "20 miles",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368248.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What location is 68 miles away?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"Monterey"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368248.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"29"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000513456.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the logo on the teddy bear's shirt?",
|
|
"answer": "skull and crossbones",
|
|
"gt_answers": [
|
|
"Streetbear"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363834.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign by the tracks say?",
|
|
"answer": "speed limit 25 mph",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351327.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the shop with the black background?",
|
|
"answer": "john smith",
|
|
"gt_answers": [
|
|
"Jones"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/734.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is advertised in the window at Jones?",
|
|
"answer": "sale",
|
|
"gt_answers": [
|
|
"sale"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/734.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is on the Jersey?",
|
|
"answer": "kansas city",
|
|
"gt_answers": [
|
|
"Kansas City.",
|
|
"KANSAS CITY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000206697.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of juice is pictured here",
|
|
"answer": "orange juice",
|
|
"gt_answers": [
|
|
"Tomato Juice",
|
|
"TOMATO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000481530.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of alcohol is pictured here",
|
|
"answer": "beer and coffee",
|
|
"gt_answers": [
|
|
"Vodka"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000481530.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand name?",
|
|
"answer": "mountain dew",
|
|
"gt_answers": [
|
|
"Mountain Dew."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the sticker on the bananna say",
|
|
"answer": "it says bananas",
|
|
"gt_answers": [
|
|
"fyffes"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324338.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name listed in gold on the building?",
|
|
"answer": "eat",
|
|
"gt_answers": [
|
|
"Zizzi"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2947.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airlines is this?",
|
|
"answer": "klm airlines",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335366.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of this cycle?",
|
|
"answer": "mountain bike",
|
|
"gt_answers": [
|
|
"TROPHY AUTHOR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000136415.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business on the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GOODELL'S ERITAGE UPHOLSTERY CO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293931.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much does a ticket cost for adults?",
|
|
"answer": "\u00a330",
|
|
"gt_answers": [
|
|
"18"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404635.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much does a ticket cost for students/seniors?",
|
|
"answer": "it is free for students",
|
|
"gt_answers": [
|
|
"16"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404635.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say after Fly?",
|
|
"answer": "fly",
|
|
"gt_answers": [
|
|
"Kingfisher"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354663.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the pizza place?",
|
|
"answer": "mad pizza",
|
|
"gt_answers": [
|
|
"Maddio's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Parking prevent which place?",
|
|
"answer": "runways",
|
|
"gt_answers": [
|
|
"Runaways"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is diplayed on the right fielder's uniform?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000544240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the advertisment for in the background?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"X-RAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000544240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bus say?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"Guest Shuttle"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2917.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say?",
|
|
"answer": "no trespassing",
|
|
"gt_answers": [
|
|
"CSX TRANSPORTATION NO TRESPASSING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355357.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the telephone number on the Hot Spot sign?",
|
|
"answer": "+1 416 888 8868",
|
|
"gt_answers": [
|
|
"416-971-6629"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which road is shownshown?",
|
|
"answer": "newbury road",
|
|
"gt_answers": [
|
|
"Newbury Road"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "planes number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"423"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000465395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team is up to bat",
|
|
"answer": "the mariners",
|
|
"gt_answers": [
|
|
"Mariners"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048548.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street address of the tall building in the image?",
|
|
"answer": "london road, london, england london, england",
|
|
"gt_answers": [
|
|
"140 London Wall.",
|
|
"LONDON WALL"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3547.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the code in the bottom left corner?",
|
|
"answer": "the code in the bottom left corner",
|
|
"gt_answers": [
|
|
"ddc95"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04204347_6372.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the white tag on the bottom left?",
|
|
"answer": "a white tag",
|
|
"gt_answers": [
|
|
"ddc95"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04204347_6372.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the model of the laptop, as shown at the bottom of the screen?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MacBook"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000361255.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the blue bus' license plate?",
|
|
"answer": "dcb 003",
|
|
"gt_answers": [
|
|
"X742 JCS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000087845.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company of the blue bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"STAGECOACH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000087845.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus' destination?",
|
|
"answer": "leeds",
|
|
"gt_answers": [
|
|
"Kirkcaldy mid stree",
|
|
"KIRKCALDY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000087845.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which airlines does that plane work for?",
|
|
"answer": "hong kong airlines",
|
|
"gt_answers": [
|
|
"shanghai airlines"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000314681.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"christopher chau"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000314681.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was the photo taken?",
|
|
"answer": "2018",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000314681.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"66713"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000043506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the post?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"1028"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000043506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What dies the little boy's shirt says?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ATHLETICS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000294850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the truck door?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"30"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000504099.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "nyc bus number 7",
|
|
"gt_answers": [
|
|
"3936"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000502197.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word to the right of the photo?",
|
|
"answer": "shoppers",
|
|
"gt_answers": [
|
|
"Arnold"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_224.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is below the stop sign?",
|
|
"answer": "a stop sign",
|
|
"gt_answers": [
|
|
"3 way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366732.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the truck?",
|
|
"answer": "city of san diego",
|
|
"gt_answers": [
|
|
"SCRAPE-DON'T RINSE",
|
|
"SCRAPE - DON'T RINSE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326816.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the far right?",
|
|
"answer": "cdiscount",
|
|
"gt_answers": [
|
|
"Cudule",
|
|
"GUDULE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344521.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the bottom left of the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Deo Araujo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/6490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the right side of the sign?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"2700"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400260.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name on the sign?",
|
|
"answer": "clay st",
|
|
"gt_answers": [
|
|
"Clay St",
|
|
"CLAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400260.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name on the sign?",
|
|
"answer": "clay st",
|
|
"gt_answers": [
|
|
"Clay St",
|
|
"CLAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400260.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building?",
|
|
"answer": "log cabin",
|
|
"gt_answers": [
|
|
"Visitor Information Center"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354846.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website can I find information about new era?",
|
|
"answer": "new era caps",
|
|
"gt_answers": [
|
|
"NEWERACAP.COM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341193.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the plane?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"IndiGo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000710.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What was this theater called?",
|
|
"answer": "plaza",
|
|
"gt_answers": [
|
|
"Plaza"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_26941.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first movie showing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Beloved"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_26941.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second movie showing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Holy man Ronin"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_26941.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Closest store name?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Masa"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_61.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the green sign in the top right of the image?",
|
|
"answer": "exit",
|
|
"gt_answers": [
|
|
"EXIT"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_252.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of company is Santa Fe Natural?",
|
|
"answer": "tobacco company",
|
|
"gt_answers": [
|
|
"Tobacco"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03666591_5876.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How far is it to the wall?",
|
|
"answer": "it's about 30 feet",
|
|
"gt_answers": [
|
|
"340"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000074090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the catcher's number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"19"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000074090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last two letters of the catcher's name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ry"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000074090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What station is this game being shown on?",
|
|
"answer": "nippon television",
|
|
"gt_answers": [
|
|
"NESN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000133510.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the umpires sleeve?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"53"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team does the batter play for?",
|
|
"answer": "new york mets",
|
|
"gt_answers": [
|
|
"Mets"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is being advertised in the stands?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"neweracap.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What radio station is being advertised for New Jersey?",
|
|
"answer": "wgl fm",
|
|
"gt_answers": [
|
|
"Jersey 101.5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What college is being advertised?",
|
|
"answer": "rutgers university",
|
|
"gt_answers": [
|
|
"Thomas Edison State College"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What amusement park is being advertised?",
|
|
"answer": "amusement park",
|
|
"gt_answers": [
|
|
"Busch Gardens"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the Chinese restaurant?",
|
|
"answer": "tom tom",
|
|
"gt_answers": [
|
|
"Dried Beef King"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "tom",
|
|
"gt_answers": [
|
|
"Mott",
|
|
"MOTT ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written above the Mott Street sign?",
|
|
"answer": "mott street",
|
|
"gt_answers": [
|
|
"Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the date?",
|
|
"answer": "january 1st",
|
|
"gt_answers": [
|
|
"08/01/13"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number that was dialed?",
|
|
"answer": "cisco ip phone",
|
|
"gt_answers": [
|
|
"421250"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the board read at the very top (in English)?",
|
|
"answer": " ",
|
|
"gt_answers": [
|
|
"Additional Services"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_78.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the first location name on the shirt",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"london"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000227337.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the second location on the shirt",
|
|
"answer": "berlin",
|
|
"gt_answers": [
|
|
"new york"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000227337.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the third location on the shirt",
|
|
"answer": "berlin",
|
|
"gt_answers": [
|
|
"Berlin"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000227337.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the bike on the left?",
|
|
"answer": "sm sm",
|
|
"gt_answers": [
|
|
"ka.0.9 ef..9690"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000035132.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are on the player's hat?",
|
|
"answer": "stl",
|
|
"gt_answers": [
|
|
"GBS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347290.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What ave is named on a sign?",
|
|
"answer": "rakeen",
|
|
"gt_answers": [
|
|
"Sergio Dukes ave",
|
|
"SERGIO DUKES"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is on a sign?",
|
|
"answer": "rakeen street",
|
|
"gt_answers": [
|
|
"Ordero Hilliard St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the street on top of gay st?",
|
|
"answer": "gay street",
|
|
"gt_answers": [
|
|
"christopher st",
|
|
"Christopher"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325704.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what kind of mushrooms?",
|
|
"answer": "oyster mushrooms",
|
|
"gt_answers": [
|
|
"Organic"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the blue sex shop?",
|
|
"answer": "super mags",
|
|
"gt_answers": [
|
|
"Super Mags"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375848.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"197"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_26.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to the airport",
|
|
"gt_answers": [
|
|
"JVR Town Hall RD",
|
|
"JUR TOWN HALL RD"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_26.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who manufactured the bus?",
|
|
"answer": "lexus",
|
|
"gt_answers": [
|
|
"Mitsubishi Electric"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_26.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the card on the bottom of the side of the fridge way?",
|
|
"answer": "it's person's birthday",
|
|
"gt_answers": [
|
|
"WE WANT YOU"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000062763.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which person is from Australia?",
|
|
"answer": "john buchanan",
|
|
"gt_answers": [
|
|
"Bryce Undy"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03584254_2289.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the field?",
|
|
"answer": "clint thomas park",
|
|
"gt_answers": [
|
|
"Blair Field"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04149813_11043.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What restaurant name is on the left side of the board?",
|
|
"answer": "cline thompson's lodge",
|
|
"gt_answers": [
|
|
"OUTBACK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04149813_11043.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what measurement given on board",
|
|
"answer": "length",
|
|
"gt_answers": [
|
|
"1560m-2120m"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference",
|
|
"answer": "the sign of board reference is a sign",
|
|
"gt_answers": [
|
|
"schonbodenb"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT NUMBER BUS IS THIS?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"11"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000131225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT KIND OF BUS IS THIS?",
|
|
"answer": "a person bus",
|
|
"gt_answers": [
|
|
"SPECIAL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000131225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who goes to town?",
|
|
"answer": "paddington",
|
|
"gt_answers": [
|
|
"Paddington"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What week of this women's pregnancy is it?",
|
|
"answer": "it is week 11",
|
|
"gt_answers": [
|
|
"Week 31"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sign is displayed on the window of the handbag store?",
|
|
"answer": "sale sign",
|
|
"gt_answers": [
|
|
"Sale"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_312.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the name say above the yellow van?",
|
|
"answer": "sams club",
|
|
"gt_answers": [
|
|
"Penrose eyecare"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the route number of the bus?",
|
|
"answer": "no route number",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04487081_8351.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus headed?",
|
|
"answer": "downtown",
|
|
"gt_answers": [
|
|
"Jasper Place"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04487081_8351.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the black car?",
|
|
"answer": "tahoe",
|
|
"gt_answers": [
|
|
"X1936X"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company has been cruisin' since 1936?",
|
|
"answer": "ford motor company",
|
|
"gt_answers": [
|
|
"Robot & Co"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year did Robot & Co start cruisin'?",
|
|
"answer": "1955",
|
|
"gt_answers": [
|
|
"1936"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is this bus advertising?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"Chicago"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the object?",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"Body"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the skateboard?",
|
|
"answer": "longboard",
|
|
"gt_answers": [
|
|
"Gravity Skateboards",
|
|
"GRAVITY SKATEBOARD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BUDDY'S"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000047767.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time do the clocks say?",
|
|
"answer": "the time of day",
|
|
"gt_answers": [
|
|
"12:01"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03196217_5486.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "whats the number on the red sign?",
|
|
"answer": "212",
|
|
"gt_answers": [
|
|
"80"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"27"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553735.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the time shown on the clock?",
|
|
"answer": "12:00",
|
|
"gt_answers": [
|
|
"9:15"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the flyer?",
|
|
"answer": "all way",
|
|
"gt_answers": [
|
|
"DANCE!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year on the red t-shirt?",
|
|
"answer": "2008",
|
|
"gt_answers": [
|
|
"1981"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000471405.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the numbers on train",
|
|
"answer": "the number on the front of the train",
|
|
"gt_answers": [
|
|
"378 136"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000120276.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the pitcher?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"27"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000382664.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sign is posted on the fence?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"Posted no trespassing"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334635.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the flight name",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360860.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the time and date",
|
|
"answer": "the time is unknown",
|
|
"gt_answers": [
|
|
"2012.12.15 08:40"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360860.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the large cake?",
|
|
"answer": "person's birthday",
|
|
"gt_answers": [
|
|
"HAPPY BIRTHDAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000329018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport are they playing?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Baseball"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000368969.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the main entrance?",
|
|
"answer": "qeb",
|
|
"gt_answers": [
|
|
"QEHB"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4457.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who took this photo",
|
|
"answer": "john gaston",
|
|
"gt_answers": [
|
|
"john cassady"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01855032_2880.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what year was the photo taken in",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2005"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01855032_2880.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airway line?",
|
|
"answer": "thai airways",
|
|
"gt_answers": [
|
|
"Hellenic Imperial"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top of the bus?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"merry holidays"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1593077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the bus?",
|
|
"answer": "the license plate of the bus",
|
|
"gt_answers": [
|
|
"JMC 8858"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1593077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is lost on the white paper?",
|
|
"answer": "goldfish",
|
|
"gt_answers": [
|
|
"Goldfish"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402910.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the dog's harness?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"Black"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02108422_3410.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word in this picture begins with an S?",
|
|
"answer": "sandwich",
|
|
"gt_answers": [
|
|
"SABOR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000051618.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word in this picture begins with a C?",
|
|
"answer": "cheese",
|
|
"gt_answers": [
|
|
"Crema"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000051618.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of sauce is in the red bottle?",
|
|
"answer": "hot sauce",
|
|
"gt_answers": [
|
|
"AJI CREMA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000051618.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is listed on the sign in the back?",
|
|
"answer": "chicago",
|
|
"gt_answers": [
|
|
"TORONTO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350397.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the white uniform shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GIANT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000249519.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of business is shown?",
|
|
"answer": "bank",
|
|
"gt_answers": [
|
|
"bank"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3723.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text say on the bottom left?",
|
|
"answer": "two women with umbrellas",
|
|
"gt_answers": [
|
|
"Sandro Lacarbona"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317364.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the back of the white jersey?",
|
|
"answer": "nationals",
|
|
"gt_answers": [
|
|
"Bat Boy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351999.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the person playing the instrument?",
|
|
"answer": "d",
|
|
"gt_answers": [
|
|
"W"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351999.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "I promise you are going to what?",
|
|
"answer": "times square",
|
|
"gt_answers": [
|
|
"love it"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365206.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the male actor on the billboard?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Sean Hayes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365206.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats is the store's name on the right?",
|
|
"answer": "john lewis",
|
|
"gt_answers": [
|
|
"PIZZA & PASTA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/767.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the maximum volumeof the bigger conical flask?",
|
|
"answer": "200ml",
|
|
"gt_answers": [
|
|
"200 ml"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_1950.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the pink store?",
|
|
"answer": "the swallow bakery",
|
|
"gt_answers": [
|
|
"THE SWALLOW BAKERY"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02776631_23368.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the webpage currently on the computer screen?",
|
|
"answer": "myspace",
|
|
"gt_answers": [
|
|
"Gravity's Grace"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000412289.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"171"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the large white word on the train?",
|
|
"answer": "metro",
|
|
"gt_answers": [
|
|
"Metra"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NUMBER WRITTEN",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"867"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02981792_3782.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THE BOAT",
|
|
"answer": "the name of the boat",
|
|
"gt_answers": [
|
|
"TYBEE 500"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02981792_3782.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who does the copyright belongs to?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Adrian Patino"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359776.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what type of game is this",
|
|
"answer": "crossword",
|
|
"gt_answers": [
|
|
"crossword puzzle"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06785654_7140.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the answer to 13 down",
|
|
"answer": "pop triple play",
|
|
"gt_answers": [
|
|
"tee"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06785654_7140.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color pen was used to pill in boxes",
|
|
"answer": "black pen",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06785654_7140.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the photo taken?",
|
|
"answer": "2008",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02114855_6829.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photographer?",
|
|
"answer": "jim bova",
|
|
"gt_answers": [
|
|
"Al Bolivar"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02114855_6829.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the trailer of the truck?",
|
|
"answer": "detroit steel",
|
|
"gt_answers": [
|
|
"CIRCUIT RIDER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000297023.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the trailer?",
|
|
"answer": "nbss",
|
|
"gt_answers": [
|
|
"110"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000297023.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the powder?",
|
|
"answer": "pressed",
|
|
"gt_answers": [
|
|
"Stockphoto"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03314780_6164.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the batter?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"14"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292257.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what name engraved on the metal?",
|
|
"answer": "egc",
|
|
"gt_answers": [
|
|
"EGG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04141327_4464.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is listed above the alligator logo?",
|
|
"answer": "lacoste",
|
|
"gt_answers": [
|
|
"Lacoste"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329092.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What hotel chain is advertised in brown and yellow?",
|
|
"answer": "starwood hotels",
|
|
"gt_answers": [
|
|
"Clarion",
|
|
"Clarion Hotel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348136.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What age do you have to be to buy alcohol and tobacco?",
|
|
"answer": "18 years old",
|
|
"gt_answers": [
|
|
"18"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_98.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT COLOUR IS THEIR BEEK",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"RED"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01860187_2912.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the sand",
|
|
"answer": "accelerate",
|
|
"gt_answers": [
|
|
"Accenture"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000071.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is a good place to get a gift?",
|
|
"answer": "the candy store",
|
|
"gt_answers": [
|
|
"the candy store"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03089624_21313.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what website can you visit to get more information about this photo?",
|
|
"answer": "wikipedia",
|
|
"gt_answers": [
|
|
"www.padelnews.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04039381_30881.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is being advertised on the bid white board?",
|
|
"answer": "a mobile phone",
|
|
"gt_answers": [
|
|
"best-kept secret to looking good"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_159.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the cost of 3 premium face, body or spa treatments?",
|
|
"answer": "sgd 100",
|
|
"gt_answers": [
|
|
"$128"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_159.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the phone number for Mary Chia?",
|
|
"answer": "the phone number for mary chia is",
|
|
"gt_answers": [
|
|
"6659 1161"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_159.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the license plate say",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"L3 UMO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the colors",
|
|
"answer": "yellow and green",
|
|
"gt_answers": [
|
|
"yellow and green",
|
|
"green and yellow"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000080745.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns the copyright to the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Gerry Walden"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2198.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase has been painted onto the wall?",
|
|
"answer": "take it to the hoop",
|
|
"gt_answers": [
|
|
"Take it to the hoop"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2198.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this tournament being played?",
|
|
"answer": "a tennis court",
|
|
"gt_answers": [
|
|
"Sydney"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318701.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can you buy from the shop on the corner?",
|
|
"answer": "cigarettes",
|
|
"gt_answers": [
|
|
"Fruit, burgers, popcorn and sandwiches"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321296.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline owns the airplane?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company files this airplane?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When is it not okay to exit here?",
|
|
"answer": "between night and day",
|
|
"gt_answers": [
|
|
"BETWEEN MIDNIGHT AND 7 A.M."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318206.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "the person",
|
|
"gt_answers": [
|
|
"Portillo's"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000537424.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who has the copyright on the photo?",
|
|
"answer": "pet insects",
|
|
"gt_answers": [
|
|
"pet_insects"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02174001_5497.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the left?",
|
|
"answer": "ub bank",
|
|
"gt_answers": [
|
|
"UOB"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_208.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of this blender?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"oster"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000044704.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say is in motion?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Art"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000350086.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many special bikes?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"40"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000350086.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title across the top of the image?",
|
|
"answer": "the strange countess",
|
|
"gt_answers": [
|
|
"The Strange Countess"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07248320_23501.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name in red text?",
|
|
"answer": "edgar wallace",
|
|
"gt_answers": [
|
|
"Edgar Wallace"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07248320_23501.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which photography firm has taken this photo?",
|
|
"answer": "krista",
|
|
"gt_answers": [
|
|
"KRISTA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365031.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this theater?",
|
|
"answer": "chicago",
|
|
"gt_answers": [
|
|
"Effingham County"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_51633.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What movie starts friday?",
|
|
"answer": "lion king",
|
|
"gt_answers": [
|
|
"Napoleon Dynamite"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03032252_51633.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What US state is printed on the green street sign?",
|
|
"answer": "illinois",
|
|
"gt_answers": [
|
|
"rhode ISLAND"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000052256.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is visible above the red vehicle?",
|
|
"answer": "fire",
|
|
"gt_answers": [
|
|
"Hydrogen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03345487_4091.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building?",
|
|
"answer": "mitsubishi",
|
|
"gt_answers": [
|
|
"ritim"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402889.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the function of the red button?",
|
|
"answer": "it is a power button",
|
|
"gt_answers": [
|
|
"POWER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_20276.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the wooden bench?",
|
|
"answer": "st. johns river",
|
|
"gt_answers": [
|
|
"Holbeck Essen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/107930.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the orange vegetable on the bag?",
|
|
"answer": "carrots",
|
|
"gt_answers": [
|
|
"Carrot"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the coffee mug from?",
|
|
"answer": "a gift from a friend",
|
|
"gt_answers": [
|
|
"Cafe Du Monde"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337422.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline",
|
|
"answer": "delta airlines",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053111.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is orange train's number on the front?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"S316"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which country is represented by the airline symbol that is so visible?",
|
|
"answer": "singapore",
|
|
"gt_answers": [
|
|
"SINGAPORE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_836.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many laps are left to go?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355855.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team is shown on the catcher's sleeve?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000375838.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made the catcher's wrist brace?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TPX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000375838.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage off is the clearance?",
|
|
"answer": "50%",
|
|
"gt_answers": [
|
|
"30%",
|
|
"30"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03877472_12747.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the train?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"67027 EWS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the camera",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"nokia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337779.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the street sign?",
|
|
"answer": "130",
|
|
"gt_answers": [
|
|
"1300"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404834.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name in the website tag?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PHOTOSTOCKSOURCE.COM"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n09835506_4173.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what time can you see on the cell phone ad?",
|
|
"answer": "the time is 12 noon",
|
|
"gt_answers": [
|
|
"1:10"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the ad claim about the phone?",
|
|
"answer": "it's the fastest android phone ever",
|
|
"gt_answers": [
|
|
"the worlds thinnest smartphone"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the brand for the ad?",
|
|
"answer": "lg",
|
|
"gt_answers": [
|
|
"oppo"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in the thought bubble on the wall say?",
|
|
"answer": "hello",
|
|
"gt_answers": [
|
|
"HOLD ME!",
|
|
"HOLD ME"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03891332_3676.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"42"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000226254.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers/letters on the aircraft?",
|
|
"answer": "f-16",
|
|
"gt_answers": [
|
|
"J-624"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324458.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which side of the cassette is pictured?",
|
|
"answer": "front",
|
|
"gt_answers": [
|
|
"Side A",
|
|
"A"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02978881_8693.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the logo on the tail of the plane say?",
|
|
"answer": "t mobile",
|
|
"gt_answers": [
|
|
"T"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278565.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the engine of the plane?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"T"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278565.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company does the airplane belong to?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TRANSAVIA.COM",
|
|
"TRANSAVIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278565.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "During what years was the artist of this painting alive?",
|
|
"answer": "person was born in 1887 and died in 1966",
|
|
"gt_answers": [
|
|
"1868-1940.",
|
|
"1868-1940"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000320503.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are on the sign?",
|
|
"answer": "hollywood",
|
|
"gt_answers": [
|
|
"Hollywood"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000560.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the person who the man in the centerwants to get voted into the big brother house?",
|
|
"answer": "sarah",
|
|
"gt_answers": [
|
|
"George"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white on the red sign on the window?",
|
|
"answer": "sale",
|
|
"gt_answers": [
|
|
"FURTHER DISCOUNTS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_318.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the yellow sign?",
|
|
"answer": "bump",
|
|
"gt_answers": [
|
|
"Bump"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362202.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is his name?",
|
|
"answer": "john",
|
|
"gt_answers": [
|
|
"Jose"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/107977.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE ANIMAL?",
|
|
"answer": "cat",
|
|
"gt_answers": [
|
|
"CAT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this bus going?",
|
|
"answer": "to the next stop",
|
|
"gt_answers": [
|
|
"Mt Airy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402586.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of one of the sponsors?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Apollinaris"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592467.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed inside the blue area?",
|
|
"answer": "intel logo",
|
|
"gt_answers": [
|
|
"Intel Inside"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000296.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the label say this is designed for?",
|
|
"answer": "microsoft windows 98",
|
|
"gt_answers": [
|
|
"Microsoft Windows XP"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000296.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow card say to do?",
|
|
"answer": "place your cell phone on the table",
|
|
"gt_answers": [
|
|
"Hang up"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000540716.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this street?",
|
|
"answer": "stockton street",
|
|
"gt_answers": [
|
|
"Stockton St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401670.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website listed on the photo?",
|
|
"answer": "dunkinkuntan",
|
|
"gt_answers": [
|
|
"duncaninkuantan.blogspot.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03467068_28225.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the serial number of this boat?",
|
|
"answer": "no serial number",
|
|
"gt_answers": [
|
|
"ME14XSR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316495.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the back of the bus say?",
|
|
"answer": "i love london",
|
|
"gt_answers": [
|
|
"Pull-ups"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000364006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word in the picture that starts with F",
|
|
"answer": "front",
|
|
"gt_answers": [
|
|
"FAG"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4109.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "nr",
|
|
"gt_answers": [
|
|
"103"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349646.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the right sign pointing to?",
|
|
"answer": "saklap dede",
|
|
"gt_answers": [
|
|
"Galip Dede"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the left sign pointing to?",
|
|
"answer": "saklap dede",
|
|
"gt_answers": [
|
|
"Sahkapisi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase is on the side of the toolkit?",
|
|
"answer": "congregational tool kit",
|
|
"gt_answers": [
|
|
"Congregational Tool Kit Resources For Your Church"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02966687_10029.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the drink?",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"Cocio",
|
|
"COCIO CLASSIC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word in yellow text?",
|
|
"answer": "william",
|
|
"gt_answers": [
|
|
"William"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319930.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word in yellow text?",
|
|
"answer": "ju",
|
|
"gt_answers": [
|
|
"Jou"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319930.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store has the vertical banner?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Old Navy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367092.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which store is located in the foreground?",
|
|
"answer": "department store",
|
|
"gt_answers": [
|
|
"GAP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367092.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company printed on the plane?",
|
|
"answer": "china airlines",
|
|
"gt_answers": [
|
|
"CHINA AIRLINES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000017675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the red letters on the back of the white truck?",
|
|
"answer": "miami beach",
|
|
"gt_answers": [
|
|
"CFI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000489967.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this for?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the road signs?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404511.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address below the blue bucket?",
|
|
"answer": "www",
|
|
"gt_answers": [
|
|
"alibaba.com.cn"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02909870_14840.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is written on the stone?",
|
|
"answer": "john",
|
|
"gt_answers": [
|
|
"LORNA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321414.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the ruler?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Light blue.",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04118776_46440.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of bus is this?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"School bus"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04146614_11104.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the two words on the circle with mickey mouse>",
|
|
"answer": "happy birthday",
|
|
"gt_answers": [
|
|
"Happy Birthday!",
|
|
"HAPPY BIRTHDAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342601.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the first word on the white circle on top of the cake?",
|
|
"answer": "happy",
|
|
"gt_answers": [
|
|
"Happy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342601.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the baseball painted on the wall?",
|
|
"answer": "23",
|
|
"gt_answers": [
|
|
"42"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the bus?",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"yellow"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000125213.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the licence plate say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"sn55 dvl"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000125213.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"212"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000125213.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of vehicle is going through the arch?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"Bus"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04486054_1409.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the letters in the top row on the sign in the upper right portion of this photo?",
|
|
"answer": "mt",
|
|
"gt_answers": [
|
|
"aud"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many children are getting off of the bus?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the four digit number of this bus?",
|
|
"answer": "999",
|
|
"gt_answers": [
|
|
"7429"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number for the player that is named Casilla?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"46"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330698.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the jet?",
|
|
"answer": "fighter jet",
|
|
"gt_answers": [
|
|
"J-062"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412661.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color stripes go down this train?",
|
|
"answer": "orange",
|
|
"gt_answers": [
|
|
"Orange",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000238051.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes the food shown?",
|
|
"answer": "general foods",
|
|
"gt_answers": [
|
|
"General Foods"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368500.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of foods are shown?",
|
|
"answer": "gourmet foods",
|
|
"gt_answers": [
|
|
"Gourmet Foods"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368500.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is above the green button on the meter?",
|
|
"answer": "parking",
|
|
"gt_answers": [
|
|
"OK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000262851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit?",
|
|
"answer": "60 mph",
|
|
"gt_answers": [
|
|
"25"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353022.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the phone next to he purple one?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"NOKIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000185200.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the blue car?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"YSK 365",
|
|
"YSK365"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4418.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is one of the sponsors on the blue wall?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Olympus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414940.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number in the square?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06794110_5477.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is shown on the clock?",
|
|
"answer": "it is 2:00 am",
|
|
"gt_answers": [
|
|
"4:22"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005975.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which sections contain letters?",
|
|
"answer": "letter b",
|
|
"gt_answers": [
|
|
"ISEST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000316571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which sections contain letters?",
|
|
"answer": "letter b",
|
|
"gt_answers": [
|
|
"SIGNS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000316571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the license plate on the train say?",
|
|
"answer": "i am a clown",
|
|
"gt_answers": [
|
|
"POPOFF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000500135.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What District is the School?",
|
|
"answer": "the school is in the st johnsbury school district",
|
|
"gt_answers": [
|
|
"District #2",
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007509.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the road?",
|
|
"answer": "mill road",
|
|
"gt_answers": [
|
|
"MILL ROAD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000252122.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sponsor logo in the background?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"KIA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317319.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of wine is the front bottle",
|
|
"answer": "red wine",
|
|
"gt_answers": [
|
|
"Port"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04591713_2857.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the two streets?",
|
|
"answer": "lakeridge and elm",
|
|
"gt_answers": [
|
|
"Forder & Main St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "shell gas station",
|
|
"gt_answers": [
|
|
"Czech stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326901.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2000",
|
|
"gt_answers": [
|
|
"2001"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number that is next to the wrecked car?",
|
|
"answer": "a city bus",
|
|
"gt_answers": [
|
|
"174"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323755.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the door?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"48184"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top green sign?",
|
|
"answer": "greenbush rd",
|
|
"gt_answers": [
|
|
"Greenbush Rd"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328549.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of traffic lane is in this photo?",
|
|
"answer": "bus only",
|
|
"gt_answers": [
|
|
"BUS ONLY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000346384.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the type say at the bottom of the train?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"First"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713906.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is under XO?",
|
|
"answer": "caus cognac",
|
|
"gt_answers": [
|
|
"Superior"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005531.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number train is this?",
|
|
"answer": "st louis",
|
|
"gt_answers": [
|
|
"2857"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04310018_8914.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the sign by the boy's knee?",
|
|
"answer": "bnp paris",
|
|
"gt_answers": [
|
|
"Mary Cohr"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403134.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the Cafe?",
|
|
"answer": "the cafe",
|
|
"gt_answers": [
|
|
"Bagdad"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363043.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last letter on the yellow sign?",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"d"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363043.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What follows Bagdad on the store front?",
|
|
"answer": "iraq",
|
|
"gt_answers": [
|
|
"Cafe"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363043.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of the red and gold train?",
|
|
"answer": "458",
|
|
"gt_answers": [
|
|
"4268"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which one of the 50 US states is in the name of of the train?",
|
|
"answer": "new york",
|
|
"gt_answers": [
|
|
"Maine"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"66"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000275206.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number of the bus that is behind the bus that has a sign that says \"TRY ME\"?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"88"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330088.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat?",
|
|
"answer": "schooner",
|
|
"gt_answers": [
|
|
"HMS SCRAPEO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376162.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the boat?",
|
|
"answer": "schooner",
|
|
"gt_answers": [
|
|
"HMS SCRAPEO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376162.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name in yellow on the side of the train?",
|
|
"answer": "eurostar",
|
|
"gt_answers": [
|
|
"Eurostar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline flys this plane?",
|
|
"answer": "southwest airlines",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377142.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What meal is inside of the red box?",
|
|
"answer": "shrimp marsala",
|
|
"gt_answers": [
|
|
"SHRIMP MARINARA"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four lettters are visible on the train?",
|
|
"answer": "london st pancras international",
|
|
"gt_answers": [
|
|
"SOUT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347936.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say?",
|
|
"answer": "tokyo station",
|
|
"gt_answers": [
|
|
"Tokyo Station"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS NAME OF THE BUS",
|
|
"answer": "explore history",
|
|
"gt_answers": [
|
|
"EXPLORE MEDWAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341028.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT TYPE OF BUS",
|
|
"answer": "double decker",
|
|
"gt_answers": [
|
|
"GUIDED TOUR BUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341028.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WEBSITE ADDRESS",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"WWW.VISITMEDWAY.ORG"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341028.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the left banner say?",
|
|
"answer": "skateboarding",
|
|
"gt_answers": [
|
|
"Gravity Skateboards"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the right banner say?",
|
|
"answer": "skateboarding",
|
|
"gt_answers": [
|
|
"Board Shop Skateboarding",
|
|
"BOARD SHOP COMAU"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the theater?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ORPHEUM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000307554.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the front of the marquee say (the side that is facing the street)?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"THE 10TH WISCONSIN FILM FESTIVAL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000307554.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is the Wisconsin Film Festival currently celebrating?",
|
|
"answer": "50th anniversary",
|
|
"gt_answers": [
|
|
"10TH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000307554.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed three times on the card above the flowers?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"Good luck"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000070387.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What shape is suggested by the name of this company?",
|
|
"answer": "star",
|
|
"gt_answers": [
|
|
"star"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000409614.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is sold by The Star?",
|
|
"answer": "beer and wine",
|
|
"gt_answers": [
|
|
"GEORGE'S BEERS",
|
|
"GEORGE'S BEERS & STOUT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000409614.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Other than beers, what is sold by The Star?",
|
|
"answer": "scotch whisky",
|
|
"gt_answers": [
|
|
"stout"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000409614.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date on this photo?",
|
|
"answer": "the date is unknown",
|
|
"gt_answers": [
|
|
"2004/3/6",
|
|
"2004"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04347754_5279.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit in this area?",
|
|
"answer": "20 mph",
|
|
"gt_answers": [
|
|
"30"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159593.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the bottom of the red snowboard?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HUGGING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000232420.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the motorcycle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HARLEY-DAVIDSON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000188575.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the green wall?",
|
|
"answer": "krees",
|
|
"gt_answers": [
|
|
"Kress"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on the white sign",
|
|
"answer": "krees",
|
|
"gt_answers": [
|
|
"Patricks"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3372.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name printed in the man;s T-shirt?",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"Yuh!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355949.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the cat food?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ja!"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462589.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers on the license plate?",
|
|
"answer": "person, person, person",
|
|
"gt_answers": [
|
|
"57247"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000427449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on the license plate?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"mg"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000427449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is listed on the IRT sign?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"1972"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577975.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is listed in the copyright?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577975.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is visible on the saddle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"7276"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577975.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much storage is on the CD-R?",
|
|
"answer": "128mb",
|
|
"gt_answers": [
|
|
"700MB"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159127.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is hand written on the sign on top of STOP?",
|
|
"answer": "don't be afraid",
|
|
"gt_answers": [
|
|
"DONT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366485.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this picture taken and located?",
|
|
"answer": "person's workshop",
|
|
"gt_answers": [
|
|
"LAGUNA BEACH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000487927.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of yellow bus",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"36"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04146614_10064.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is found on the side of the bus?",
|
|
"answer": "ecocar",
|
|
"gt_answers": [
|
|
"ESCOLAR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000216142.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is on this box?",
|
|
"answer": "28",
|
|
"gt_answers": [
|
|
"88"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04372370_27244.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what letter is on this box?",
|
|
"answer": "b",
|
|
"gt_answers": [
|
|
"S"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04372370_27244.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom of the snowboard say?",
|
|
"answer": "k2",
|
|
"gt_answers": [
|
|
"Step Child"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368863.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign in this picture?",
|
|
"answer": "ushai",
|
|
"gt_answers": [
|
|
"Ushuai"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000513980.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two things are written on the red sign?",
|
|
"answer": "stop and yield",
|
|
"gt_answers": [
|
|
"Stop Seten"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373787.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the train?",
|
|
"answer": "u s train number",
|
|
"gt_answers": [
|
|
"6068"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337875.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What emblem is shown in the top left corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"My prof e"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000574885.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number found on the green fence?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"330"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000413713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is advertised on the green fence?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Adidas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000413713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this picture?",
|
|
"answer": "daniel",
|
|
"gt_answers": [
|
|
"Paco Lozano"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03788195_2655.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What date is written on the fire hydrant?",
|
|
"answer": "september 11, 2001",
|
|
"gt_answers": [
|
|
"7/9/06"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000133908.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written above the 7/9/06?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"29716/03"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000133908.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of camera is the blue box?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Canon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000432212.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the name of the store?",
|
|
"answer": "titan",
|
|
"gt_answers": [
|
|
"Titan"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the brand of watch in the advertisement?",
|
|
"answer": "titan",
|
|
"gt_answers": [
|
|
"Tycoon"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is the security guard standing or sitting?",
|
|
"answer": "standing",
|
|
"gt_answers": [
|
|
"Sitting"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001225.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the bottom right side of the yellow cup?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CocaCola",
|
|
"COCA COLA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499204.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is printed on the top of the cup?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1916"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000499204.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the active ingredient?",
|
|
"answer": "benzyl alcohol",
|
|
"gt_answers": [
|
|
"purified water"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are displayed on the lower area of the barrel?",
|
|
"answer": "super stock",
|
|
"gt_answers": [
|
|
"Super Stock"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02795169_6193.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the back of the bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"School Bus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352668.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the car?",
|
|
"answer": "tah",
|
|
"gt_answers": [
|
|
"177626"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352668.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of milk is shown?",
|
|
"answer": "whole milk",
|
|
"gt_answers": [
|
|
"Semi-Skimmed Milk",
|
|
"SEMI-SKIMMED"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030694.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the capacity of the milk bottle?",
|
|
"answer": "1 litre",
|
|
"gt_answers": [
|
|
"1 litre"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030694.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color does the stoplight show?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"red"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the red sign?",
|
|
"answer": "a stop sign",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344672.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number",
|
|
"answer": "nr",
|
|
"gt_answers": [
|
|
"186 284"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000411825.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns the truck?",
|
|
"answer": "s & w",
|
|
"gt_answers": [
|
|
"M. Way & Son",
|
|
"M.WAY & SON"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377072.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is visible at the top of the fence?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"One"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357743.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the plane?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"N288SA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713106.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the hood of the truck?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Hot Dogs"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say immediately over the truck in blue lettering on the yellow background?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"SABRETT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What comes after \"The World of\"",
|
|
"answer": "jughead",
|
|
"gt_answers": [
|
|
"Jughead"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_4220.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much did this cost?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"25 c"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_4220.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What issue was this?",
|
|
"answer": "the world of jughead",
|
|
"gt_answers": [
|
|
"NO.172",
|
|
"172"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_4220.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the trailer?",
|
|
"answer": "studs and best",
|
|
"gt_answers": [
|
|
"Sturgis & Bust",
|
|
"Sturgis or Bust"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of business is this?",
|
|
"answer": "barber shop",
|
|
"gt_answers": [
|
|
"Barber shop"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02791270_1798.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bottom word written on the blue cloud on the wall?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BIRTHDAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000367259.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say?",
|
|
"answer": "tunis",
|
|
"gt_answers": [
|
|
"Tunis"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000467126.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What state is listed in the text?",
|
|
"answer": "washington",
|
|
"gt_answers": [
|
|
"Colorado"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03887697_2293.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is listed in the text?",
|
|
"answer": "seattle",
|
|
"gt_answers": [
|
|
"Dallas"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03887697_2293.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is listed on the top of the page?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"498"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000285841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airline of the blue plane?",
|
|
"answer": "klm",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406090.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "u.s.air force namer",
|
|
"answer": "the u s air force",
|
|
"gt_answers": [
|
|
"53366"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328486.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is the fence",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"blue"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000763.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many people are in the photo",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"12",
|
|
"13"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000763.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the special buy price?",
|
|
"answer": "$1.50 per pound",
|
|
"gt_answers": [
|
|
"$1.99"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413931.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the train?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"67015"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150484.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in blue?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"Cinema Casino"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what company is advertised",
|
|
"answer": "google",
|
|
"gt_answers": [
|
|
"google"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the maximum height with units?",
|
|
"answer": "the maximum height with units is 30 m",
|
|
"gt_answers": [
|
|
"3.3M",
|
|
"3,3m"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_192.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number that shows where the bus is going?",
|
|
"answer": "nr",
|
|
"gt_answers": [
|
|
"73",
|
|
"73E"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327105.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "True or false the bread bag says pain?",
|
|
"answer": "false",
|
|
"gt_answers": [
|
|
"PAIN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000221120.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Does Bon appeu appear in this photo?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"BON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000221120.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000172974.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the topic discussed in the cassette?",
|
|
"answer": "what hifi",
|
|
"gt_answers": [
|
|
"Your complete guide to buying Hi-Fi"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02978881_3761.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What side of the cassette is facing upwards?",
|
|
"answer": "the white side",
|
|
"gt_answers": [
|
|
"Side 1"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02978881_3761.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the city name for this police car?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PORTLAND"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000335339.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the wonan\u2019s Jacket?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4626.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the bus?",
|
|
"answer": "tour bus",
|
|
"gt_answers": [
|
|
"Gray Line",
|
|
"GRAYLINE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357685.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters in the bottom right corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"K9"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02113978_5127.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the jersey number on the back",
|
|
"answer": "86",
|
|
"gt_answers": [
|
|
"28"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346662.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of player?",
|
|
"answer": "michael jackson",
|
|
"gt_answers": [
|
|
"Posey"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346662.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the white umbrella?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"Visa"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What single digit can be seen in the website tag?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03814639_379.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is across the way whose name appears in white?",
|
|
"answer": "macy's",
|
|
"gt_answers": [
|
|
"SIXTIES"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_317.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How far away is Unter den Linden?",
|
|
"answer": "from here",
|
|
"gt_answers": [
|
|
"750m"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which destination is closest?",
|
|
"answer": "friedrichstrasse",
|
|
"gt_answers": [
|
|
"Deutsches Currywurst Museum"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow text say",
|
|
"answer": "welcome",
|
|
"gt_answers": [
|
|
"Welcome!",
|
|
"WELCOME"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001277.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the batter wearing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000476341.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 3 letters on the batter's uniform?",
|
|
"answer": "jp",
|
|
"gt_answers": [
|
|
"UNI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000476341.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which of these train names is also a sign of the zodiac?",
|
|
"answer": "leo",
|
|
"gt_answers": [
|
|
"Leo"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000389065.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the sign?",
|
|
"answer": "15",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411708.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is for life according to the sign?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"Food"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411708.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the motorcycle donated?",
|
|
"answer": "2007",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_15381.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What month was the motorcycle donated?",
|
|
"answer": "november",
|
|
"gt_answers": [
|
|
"November"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_15381.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the blue panels on the left?",
|
|
"answer": "tennis court",
|
|
"gt_answers": [
|
|
"South African Airways"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342402.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter in black on a yellow background, on a is visible in the background?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"a"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000429318.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is visible in pink on a sticker on the front of the brown suitcase to the lefthand side?",
|
|
"answer": "d",
|
|
"gt_answers": [
|
|
"H"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000429318.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the bus",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"Yellow"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number",
|
|
"answer": "nr 1",
|
|
"gt_answers": [
|
|
"66"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000042705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store in the corner?",
|
|
"answer": "subway",
|
|
"gt_answers": [
|
|
"Swatch"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498010.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last initial on the towel?",
|
|
"answer": "m",
|
|
"gt_answers": [
|
|
"M"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4053.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "mvb",
|
|
"gt_answers": [
|
|
"931"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000009226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train track number?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000009226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the street sign?",
|
|
"answer": "hammock",
|
|
"gt_answers": [
|
|
"Hammock",
|
|
"Hammock Ln"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319733.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of establishment is Betos Place?",
|
|
"answer": "shop",
|
|
"gt_answers": [
|
|
"RESTAURANT BAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409126.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What baseball team is on the yellow banner?",
|
|
"answer": "los angeles dodgers",
|
|
"gt_answers": [
|
|
"BOSTON RED SOX"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409126.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of beer has a crown over the name logo?",
|
|
"answer": "coors light",
|
|
"gt_answers": [
|
|
"Corona Extra",
|
|
"CORONA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409126.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which never closes?",
|
|
"answer": "the kitchen door",
|
|
"gt_answers": [
|
|
"kitchen"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000448351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do you need to be in order for the Kitchen to be always open?",
|
|
"answer": "a cook",
|
|
"gt_answers": [
|
|
"cook"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000448351.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sticker on the bottom right say?",
|
|
"answer": "teddy bear",
|
|
"gt_answers": [
|
|
"Spencer Jr."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285672.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is featured on the tennis racquet?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"W"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374095.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What watch maker is the sponsor of the time clock?",
|
|
"answer": "omega",
|
|
"gt_answers": [
|
|
"Rolex"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374095.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the black text say?",
|
|
"answer": "a pylon is a power transmission line",
|
|
"gt_answers": [
|
|
"accenture"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000074.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the white arrows?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339627.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can you expect to find in the store to the left?",
|
|
"answer": "men's and women's clothing",
|
|
"gt_answers": [
|
|
"FITTING ROOM"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_384.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photography company?",
|
|
"answer": "diana whitte photography",
|
|
"gt_answers": [
|
|
"Diana Whyte Photography"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the child's outfit?",
|
|
"answer": "purple",
|
|
"gt_answers": [
|
|
"purple"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the chair?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the pregnant woman holding?",
|
|
"answer": "shoes",
|
|
"gt_answers": [
|
|
"Baby shoes"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/6464.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOR OF THE DOG?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"BLACK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02110627_25596.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is listed in the image?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"alibaba.com.cn"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04153751_9565.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is listed on the side of the bus?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"London"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000002644.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of guys are these?",
|
|
"answer": "i'm not sure",
|
|
"gt_answers": [
|
|
"Coffee"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030160.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can happen if if you are not careful around the moving arm?",
|
|
"answer": "you can get hurt",
|
|
"gt_answers": [
|
|
"The moving arm can cause bodily harm or vehicle damage.",
|
|
"bodily harm or vehicle damage"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342400.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the nutritious oils in this dressing do?",
|
|
"answer": "promote healthy skin and hair",
|
|
"gt_answers": [
|
|
"ABSORB THE ANTIOXIDANTS"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028156.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this dressing add to you salad?",
|
|
"answer": "it adds calories and fat",
|
|
"gt_answers": [
|
|
"Delicious flavor"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028156.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348589.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage off is offered in the window?",
|
|
"answer": "50",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_801.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN THE ADVERTISEMENT BOARD?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MOET & CHANDON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "slogan on shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"JUST DO IT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000186544.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is the airplane?",
|
|
"answer": "ethiopian airlines",
|
|
"gt_answers": [
|
|
"Ethiopian"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000304590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is the loading staircase?",
|
|
"answer": "ethiopian airlines",
|
|
"gt_answers": [
|
|
"Kenya Airways"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000304590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of emergency vehicle is pictured below?",
|
|
"answer": "police car",
|
|
"gt_answers": [
|
|
"Police"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345619.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand name?",
|
|
"answer": "smith",
|
|
"gt_answers": [
|
|
"HIMS"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008237.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What ingredients are in the drink?",
|
|
"answer": "lemonade",
|
|
"gt_answers": [
|
|
"GRAPEFRUIT CHAMOMILE & CARDAMON"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is the sale at the store?",
|
|
"answer": "the sale at the store",
|
|
"gt_answers": [
|
|
"70%"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_878.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the board read?",
|
|
"answer": "0 outs",
|
|
"gt_answers": [
|
|
"DODGERS:COM",
|
|
"DODGERS.COM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000249362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the wall?",
|
|
"answer": "i love tennis",
|
|
"gt_answers": [
|
|
"Vittel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405865.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what his t-shirt refer",
|
|
"answer": "organic waves",
|
|
"gt_answers": [
|
|
"QUIK SILVER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342309.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the ferries?",
|
|
"answer": "ferries",
|
|
"gt_answers": [
|
|
"Brittany"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358265.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the post?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"Two"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000571199.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the post?",
|
|
"answer": "lisbon",
|
|
"gt_answers": [
|
|
"Correio"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000571199.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow sign say?",
|
|
"answer": "turtle king",
|
|
"gt_answers": [
|
|
"SLOW TURTLE XING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359889.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the train?",
|
|
"answer": "the train is a dmu",
|
|
"gt_answers": [
|
|
"112 112",
|
|
"112"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000242973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase is on the tram?",
|
|
"answer": "''police''",
|
|
"gt_answers": [
|
|
"WIR SORGEN FUR IHRE SICHERHEIT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000242973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the front of the tram?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"POLIZEI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000242973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the store name on the second floor?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"GUESS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue sign say?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"GUESS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the Oven?",
|
|
"answer": "ge",
|
|
"gt_answers": [
|
|
"HOLF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000209923.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boulevard on the sign?",
|
|
"answer": "logan boulevard",
|
|
"gt_answers": [
|
|
"Logan",
|
|
"Logan Blvd."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06874185_113.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 4 digit number is written on the sign?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"2600"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06874185_113.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on the side of the train in yellow?",
|
|
"answer": "csx",
|
|
"gt_answers": [
|
|
"CSX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000093000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on the front of the train?",
|
|
"answer": "csx",
|
|
"gt_answers": [
|
|
"CSX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000093000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What character and numbers are on the top right front of the train?",
|
|
"answer": "c s c s",
|
|
"gt_answers": [
|
|
"B432",
|
|
"8432"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000093000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What gate is this?",
|
|
"answer": "a pedestrian gate",
|
|
"gt_answers": [
|
|
"The side gate",
|
|
"SIDE-GATE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which state is this product from?",
|
|
"answer": "new york",
|
|
"gt_answers": [
|
|
"VERMONT"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which organization is shown on the blue badge on the product?",
|
|
"answer": "neighborhood cheese",
|
|
"gt_answers": [
|
|
"American Cheese Society."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the sewing machine?",
|
|
"answer": "singer",
|
|
"gt_answers": [
|
|
"Singer"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04179913_14547.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of traffic signal is it?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000248496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of stop signal is it?",
|
|
"answer": "four way",
|
|
"gt_answers": [
|
|
"4 way"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000248496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "alto",
|
|
"gt_answers": [
|
|
"Alto"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is advertised on the bottom right corner?",
|
|
"answer": "youtube",
|
|
"gt_answers": [
|
|
"www.myprofe.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359507.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE BRAND OF THIS SURGE PROTECTOR?",
|
|
"answer": "philips",
|
|
"gt_answers": [
|
|
"BELKIN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000080566.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"31"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000364476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in front of the sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT NUMBER IS THE BATTER?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"74 74",
|
|
"74"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048032.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number radio station is listed as a sponsor on the wall?",
|
|
"answer": "fm station",
|
|
"gt_answers": [
|
|
"101.5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word written on the chest of the man in the blue jacket?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Thunder"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the photo taken?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000194788.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the black tag say?",
|
|
"answer": "it's a cake",
|
|
"gt_answers": [
|
|
"2011 turku abo"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000031994.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book?",
|
|
"answer": "i am america",
|
|
"gt_answers": [
|
|
"\"I am America (and so can you)\"",
|
|
"I AM AMERICA (AND SO CAN YOU!)"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the author of the book?",
|
|
"answer": "stephen colbert",
|
|
"gt_answers": [
|
|
"Stephen Colbert"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text written on the banner?",
|
|
"answer": "i am a human being",
|
|
"gt_answers": [
|
|
"Klimaschutz jetzt",
|
|
"KLIMASCHUTZ JETZT!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the bottom level of the mall?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Avenue"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the apparel retail store?",
|
|
"answer": "j crew",
|
|
"gt_answers": [
|
|
"Avenue"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the only number visible on the player's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000050686.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company manufactured this bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Woodey",
|
|
"WOODRY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000435327.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "the shop MUNCHIES has what written in red text on the window?",
|
|
"answer": "munchies",
|
|
"gt_answers": [
|
|
"HOT & COLD FILLED ROLLS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315938.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "tiny letters on sweater?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"OXY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000494341.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE DOLL?",
|
|
"answer": "teddy bear",
|
|
"gt_answers": [
|
|
"BEAR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325101.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in the sign",
|
|
"answer": "59",
|
|
"gt_answers": [
|
|
"59"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4699.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign?",
|
|
"answer": "isla",
|
|
"gt_answers": [
|
|
"Isla"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4699.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the brand name of the key board?",
|
|
"answer": "dell",
|
|
"gt_answers": [
|
|
"DELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000580960.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What character name is on the fork?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HELLO KITTY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number written in bold red?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000012556.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What institute is this?",
|
|
"answer": "indian institute of technology",
|
|
"gt_answers": [
|
|
"INDIAN INSTITUTE OF TECHNOLOGY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000068.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many people are in this photo?",
|
|
"answer": "seven",
|
|
"gt_answers": [
|
|
"Seven",
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000075.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is just here?",
|
|
"answer": "city center",
|
|
"gt_answers": [
|
|
"Macy's",
|
|
"MACYS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company shop has a blue sign?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"Apex"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_481.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website name at the bottom?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.axcium.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02097209_3435.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photographer?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"John Bald"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this doughnut featured?",
|
|
"answer": "the doughnut shop",
|
|
"gt_answers": [
|
|
"Boston Globe"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000443752.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What object do you see in the picture?",
|
|
"answer": "broom",
|
|
"gt_answers": [
|
|
"Broom"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02906734_7411.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company's logo is on the coffee cup?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"STARBUCKS COFFEE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000460694.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word in white at the top of the clock?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"james"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000579899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on the back of the blue jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SCOTT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000187103.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the sign?",
|
|
"answer": "west welcome",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the air craft",
|
|
"answer": "air canada express",
|
|
"gt_answers": [
|
|
"AIR CANADA express"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000185888.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is on the oragnger players shorts",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"25"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many people are on the soccer field",
|
|
"answer": "eight",
|
|
"gt_answers": [
|
|
"seven",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is the mans shirt whos carring the ball",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"yellow",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in the top left of the picture above the gray haired lady's head?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000481702.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bus?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"11"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343455.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the bus?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343455.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is printed on the back of the catcher's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Rodriguez"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000101234.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company in the building?",
|
|
"answer": "bank of america",
|
|
"gt_answers": [
|
|
"Cambridge trust company"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1414.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "person's drugs",
|
|
"gt_answers": [
|
|
"Engstrom's Prescriptions"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351293.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is sold in this store?",
|
|
"answer": "drugs",
|
|
"gt_answers": [
|
|
"Drugs"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351293.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What insurance company is referenced on the newspaper?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Prudential"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000044763.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the back of the grey shirt?",
|
|
"answer": "phillies",
|
|
"gt_answers": [
|
|
"DEBRAND"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321608.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the tennis racket?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"W"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000461389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are in the picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000461389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book being read?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Mother Goose"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000436453.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the preferred charge card?",
|
|
"answer": "visa",
|
|
"gt_answers": [
|
|
"VISA"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_109.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the red sign on the fence?",
|
|
"answer": "motocross",
|
|
"gt_answers": [
|
|
"102.5 clyde 1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345393.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the projectors?",
|
|
"answer": "hp",
|
|
"gt_answers": [
|
|
"Sharp"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04009552_2996.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is this?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"www.china.cn"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02769748_474.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what website is featured on the picture?",
|
|
"answer": "china mainland",
|
|
"gt_answers": [
|
|
"www.china.cn"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02769748_474.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of food are they going to be cooking?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Grilled Pizza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the wine?",
|
|
"answer": "grilled pizza",
|
|
"gt_answers": [
|
|
"BARBERA D'ASTI"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top word written on the sign?",
|
|
"answer": "music",
|
|
"gt_answers": [
|
|
"music"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000521982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the black part of the sign?",
|
|
"answer": "music is great",
|
|
"gt_answers": [
|
|
"music is GREAT",
|
|
"MUSIC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000521982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the shortest word written on the sign?",
|
|
"answer": "music",
|
|
"gt_answers": [
|
|
"is"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000521982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are on the train?",
|
|
"answer": "246",
|
|
"gt_answers": [
|
|
"246"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339245.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photograph?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Olivier Cabaret"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368251.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is the image copyrighted in?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2004"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02091134_5925.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what name is on the clock",
|
|
"answer": "saint peter",
|
|
"gt_answers": [
|
|
"saint christopher's place"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is water?",
|
|
"answer": "$2",
|
|
"gt_answers": [
|
|
"$2.00"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000064730.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the cell phone?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"LG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who made the cell phone?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"LG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the watermark say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BYMAHS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on the green sign?",
|
|
"answer": "lake street",
|
|
"gt_answers": [
|
|
"S CANAL ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000488990.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What appears below S CANAL ST?",
|
|
"answer": "the river",
|
|
"gt_answers": [
|
|
"500 W"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000488990.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bread?",
|
|
"answer": "rye bread",
|
|
"gt_answers": [
|
|
"Acme",
|
|
"ACME BREAD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415917.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what do the white letters on the ground say",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"do not enter"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366001.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "newsagency",
|
|
"gt_answers": [
|
|
"NEWSAGENCY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the trolley number?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"1209"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04335435_18633.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the copyright date?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer for this image?",
|
|
"answer": "enrique margarita",
|
|
"gt_answers": [
|
|
"Gustavo Morejon J."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photographer's website address?",
|
|
"answer": "http://www",
|
|
"gt_answers": [
|
|
"www.gustavomorejon.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which soda is advertised in the picture",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"pepsi"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098732.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what sauce is advertised in the picture",
|
|
"answer": "mustard",
|
|
"gt_answers": [
|
|
"ketchup"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098732.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What business name is on the tallest building?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"Vodafone"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001242.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the motorbike say?",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"POLICE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339086.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the memorial in memory of?",
|
|
"answer": "mary jo green",
|
|
"gt_answers": [
|
|
"Mary Ann Griffin"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_17047.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When did Mary Ann Grifin die?",
|
|
"answer": "she died on october 1st, 1908",
|
|
"gt_answers": [
|
|
"8-8-1876"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_17047.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many numbers are on the wings?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"four"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000215502.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the numbers on the wings?",
|
|
"answer": "a b",
|
|
"gt_answers": [
|
|
"1944"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000215502.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the double dorrs?",
|
|
"answer": "bank of asia",
|
|
"gt_answers": [
|
|
"Swarovski"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_285.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What item code is listed in the photo?",
|
|
"answer": "lb-breast-bikini-pink",
|
|
"gt_answers": [
|
|
"4838"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892767_13819.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What size is the item shown?",
|
|
"answer": "medium",
|
|
"gt_answers": [
|
|
"34"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892767_13819.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the email for the company?",
|
|
"answer": "labrasse@gmail com",
|
|
"gt_answers": [
|
|
"LaBrassiere@ymail.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892767_13819.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 2 streets is this at?",
|
|
"answer": "west street and king street",
|
|
"gt_answers": [
|
|
"West 32nd St and Korea Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332161.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number above the door?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1760"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is listed in the copyright?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000487322.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the fire hydrant?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"12470"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365720.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the website?",
|
|
"answer": "lunchinbox net",
|
|
"gt_answers": [
|
|
"lunchinabox.net"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04557648_11339.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in the red octagon?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321622.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the soda in the picture?",
|
|
"answer": "diet coke",
|
|
"gt_answers": [
|
|
"Coca Cola"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000054294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car is on the wall?",
|
|
"answer": "audi",
|
|
"gt_answers": [
|
|
"Kia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405990.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How frequently does the bus in this image appear at the same location according to its own propoganda?",
|
|
"answer": "every 15 minutes",
|
|
"gt_answers": [
|
|
"FROM EVERY 5 MINUTES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420488.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of business is on the black advertisment?",
|
|
"answer": "beach",
|
|
"gt_answers": [
|
|
"Entertainment"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713356.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street address for the entertainment business?",
|
|
"answer": "santa clara street",
|
|
"gt_answers": [
|
|
"125 Fifth Ave",
|
|
"125 Fifth Ave. Brooklyn, NY 11217"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713356.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the total bet?",
|
|
"answer": "5 coins",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_28435.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the balance?",
|
|
"answer": "balance is the amount of money in your casino account",
|
|
"gt_answers": [
|
|
"1356.55"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_28435.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the game called?",
|
|
"answer": "slot machine",
|
|
"gt_answers": [
|
|
"Mystery of the Tarot"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_28435.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is listed on the tiles?",
|
|
"answer": "1889",
|
|
"gt_answers": [
|
|
"1991"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the shirt say?",
|
|
"answer": "iron cross",
|
|
"gt_answers": [
|
|
"west coast choppers"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_22179.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date this photo was taken?",
|
|
"answer": "september 2008",
|
|
"gt_answers": [
|
|
"08.14.2007"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000051302.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is branded on these towels?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"Samsung"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02808304_2359.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of vehicle is on the pier?",
|
|
"answer": "truck",
|
|
"gt_answers": [
|
|
"truck"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331968.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the bottom of the street sign?",
|
|
"answer": "time",
|
|
"gt_answers": [
|
|
"Everyday"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409577.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the street sign?",
|
|
"answer": "night",
|
|
"gt_answers": [
|
|
"Night"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409577.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first number on the street sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409577.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand on the top right of the front of the van?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Westfalia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000136369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the two letters on the van's license plate?",
|
|
"answer": "person and person",
|
|
"gt_answers": [
|
|
"DW"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000136369.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the person who owns this photo?",
|
|
"answer": "nick",
|
|
"gt_answers": [
|
|
"Nicki"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02410509_873.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the person who owns this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Toizer"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02410509_873.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of vitamin does this contain?",
|
|
"answer": "vitamin c",
|
|
"gt_answers": [
|
|
"Vitamin E Pearls"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007400.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the words on the sign under the no left turn arrow?",
|
|
"answer": "no parking on the street",
|
|
"gt_answers": [
|
|
"no parking any time"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000184709.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What compartment is the man at the front of the train in?",
|
|
"answer": "the driver's compartment",
|
|
"gt_answers": [
|
|
"Conductor"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406261.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the train?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"57"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406261.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company that owns this plane?",
|
|
"answer": "delta airlines",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000171637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the plane?",
|
|
"answer": "british airways",
|
|
"gt_answers": [
|
|
"British Airways"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000210.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are on the poster?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"M.K",
|
|
"MK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000111525.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the yellow last word on the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Reileck"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000573981.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the town?",
|
|
"answer": "otello",
|
|
"gt_answers": [
|
|
"Othello"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/497961.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the phone number of the Cafe Huong Que",
|
|
"answer": "510-644-4444",
|
|
"gt_answers": [
|
|
"206-722-4318"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/497961.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is the Cafe Huong Que open or closed",
|
|
"answer": "open",
|
|
"gt_answers": [
|
|
"open"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/497961.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the road sign?",
|
|
"answer": "elm street",
|
|
"gt_answers": [
|
|
"Main Street"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150551.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the book?",
|
|
"answer": "the cat in the hat",
|
|
"gt_answers": [
|
|
"Green Eggs and Ham"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000451018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What food is shown on the plate?",
|
|
"answer": "spaghetti",
|
|
"gt_answers": [
|
|
"Green Eggs and Ham"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000451018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What white text is found above the illustration?",
|
|
"answer": "the cat in bed",
|
|
"gt_answers": [
|
|
"Green Eggs and Ham"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000451018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this shop sells?",
|
|
"answer": "haircuts",
|
|
"gt_answers": [
|
|
"HAIR CUTS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02791270_8433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What exercise is being shown?",
|
|
"answer": "barbell curl",
|
|
"gt_answers": [
|
|
"BARBELL BICEPS CURL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02790996_2967.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the gas station?",
|
|
"answer": "sam's",
|
|
"gt_answers": [
|
|
"Gulf"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000306535.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sports team is on the guys shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Bears"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000090444.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is underlined?",
|
|
"answer": "oad",
|
|
"gt_answers": [
|
|
"DADDY"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028730.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the team the man plays for?",
|
|
"answer": "texas",
|
|
"gt_answers": [
|
|
"Texas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368064.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the player's number?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"44"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368064.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what you seen text on top right corner?",
|
|
"answer": "\"relaciones de mujeres 8\"",
|
|
"gt_answers": [
|
|
"Torremozas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370068.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are written on the middle two tiers of cake?",
|
|
"answer": "i believe in marriage equality",
|
|
"gt_answers": [
|
|
"PRIVILEGED STRUGGLING",
|
|
"STRUGGLING PRIVILEGED"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000360473.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the top tier of cake?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SINKING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000360473.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the bottom tier of cake?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ELITE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000360473.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the coffee shop seen?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"Starbucks",
|
|
"STARBUCKS COFFEE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the brewery?",
|
|
"answer": "brew pub",
|
|
"gt_answers": [
|
|
"Morgan brewery",
|
|
"MORGAN STREET"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the play ground",
|
|
"answer": "tennis court",
|
|
"gt_answers": [
|
|
"PEUGEOT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363635.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first four letters of the word in the lower right hand corner?",
|
|
"answer": "ta",
|
|
"gt_answers": [
|
|
"MARK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04476259_2726.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two letters are listed after the dot?",
|
|
"answer": "a and b",
|
|
"gt_answers": [
|
|
"NL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04476259_2726.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what's the brand name at the top of the ball?",
|
|
"answer": "rawlings",
|
|
"gt_answers": [
|
|
"rawlings"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02799071_88.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is color of plant",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"green"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02259212_2045.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photography company featured in the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Olivia Allison Photography"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000511249.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the mug say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Starbucks coffee"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293385.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What follows 'shine' on the sign on the building?",
|
|
"answer": "tokyo",
|
|
"gt_answers": [
|
|
"City"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000305973.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the shop front?",
|
|
"answer": "chanel",
|
|
"gt_answers": [
|
|
"Omega"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_171.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is are the tube shaped fruits on the bottom of the image called?",
|
|
"answer": "bananas",
|
|
"gt_answers": [
|
|
"BANANAS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000338670.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number iis Barber shop?",
|
|
"answer": "second genesis",
|
|
"gt_answers": [
|
|
"5107"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is Susan\ufffd\ufffd\ufffds",
|
|
"answer": "s short",
|
|
"gt_answers": [
|
|
"Unisex hairstylist",
|
|
"HAIRSTYLIST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in white on black on the skier's coat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GRENADE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000574218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What motor company is she sponsored by?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ford"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000103722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus' licence number?",
|
|
"answer": "lt stc",
|
|
"gt_answers": [
|
|
"byo yx59",
|
|
"YX59 BYO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000187131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What area does the bus travel on its route?",
|
|
"answer": "south london",
|
|
"gt_answers": [
|
|
"centre town sutton",
|
|
"SUTTON TOWN CENTRE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000187131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who made the bus?",
|
|
"answer": "leyland",
|
|
"gt_answers": [
|
|
"abellio abellio",
|
|
"ABELLIO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000187131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written on the mirror?",
|
|
"answer": "car",
|
|
"gt_answers": [
|
|
"OBJECTS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02965783_2417.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do they sell at the purple tent?",
|
|
"answer": "samosas",
|
|
"gt_answers": [
|
|
"tea"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000568640.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do they do in the blue tent?",
|
|
"answer": "they sell food",
|
|
"gt_answers": [
|
|
"FLY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000568640.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the large building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MDCCCVI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000081135.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who says you can't park here?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"police"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000081102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage is on the sign?",
|
|
"answer": "100",
|
|
"gt_answers": [
|
|
"60%"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415447.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT ISTHE NUMBER OF THE TRAIN?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"46521"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "HOW MANY PERSON ARE THERE IN THE PITCURE?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355508.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the tracker?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red.",
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03496892_4388.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many tires do you see?",
|
|
"answer": "four",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03496892_4388.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What school is listed on the picture?",
|
|
"answer": "department of computer science",
|
|
"gt_answers": [
|
|
"The Erik Jonsson School of Engineering & Computer Science At The University Of Texas At Dallas",
|
|
"THE ERIK JONSSON SCHOOL"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000403.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What department is listed on the picture?",
|
|
"answer": "department of computer science",
|
|
"gt_answers": [
|
|
"Department of Computer Science"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000403.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this picture taken?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2004"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What month was this picture taken?",
|
|
"answer": "august",
|
|
"gt_answers": [
|
|
"Nov"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team name is displayed on the player's jersey?",
|
|
"answer": "tampa bay rays",
|
|
"gt_answers": [
|
|
"RAYS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000500740.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company name is displayed on the package of gum?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WRIGLEY'S"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000500740.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flavor is the gum?",
|
|
"answer": "mint",
|
|
"gt_answers": [
|
|
"DOUBLEMINT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000500740.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the camera?",
|
|
"answer": "canon",
|
|
"gt_answers": [
|
|
"Canon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_21423.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the camera strap?",
|
|
"answer": "canon",
|
|
"gt_answers": [
|
|
"CANON EOS DIGITAL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_21423.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is printed on the magazine?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TINA FEY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000252069.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the magazine?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"VANITY FAIR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000252069.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is a brand of coffee maker?",
|
|
"answer": "nespresso",
|
|
"gt_answers": [
|
|
"NESCAFE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_96.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written within the speak bubble on the shirt?",
|
|
"answer": "a fish",
|
|
"gt_answers": [
|
|
"justo!",
|
|
"JUSTO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_19756.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the clothing brand?",
|
|
"answer": "santos",
|
|
"gt_answers": [
|
|
"Justo"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_19756.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how much did the shirt cost?",
|
|
"answer": "$18",
|
|
"gt_answers": [
|
|
"14.99"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_19756.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words have been added to the stop sign?",
|
|
"answer": "no thru traffic ahead",
|
|
"gt_answers": [
|
|
"Don't Believin!",
|
|
"DON'T BELIEVIN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Mostaque A Chowdhury"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000183413.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue board next to the man with the green hat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GNOL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065136.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the third board from the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SIERRA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065136.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after 'THE' on the sign?",
|
|
"answer": "restaurant",
|
|
"gt_answers": [
|
|
"Best"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319929.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the speed boat?",
|
|
"answer": "victory",
|
|
"gt_answers": [
|
|
"Victory"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04273569_11231.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the speed boat?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"77"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04273569_11231.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the speed boat?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04273569_11231.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is listed on the top of the speaker?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"Logitech"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04041544_3215.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date",
|
|
"answer": "the date is february",
|
|
"gt_answers": [
|
|
"3/26/05"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02108089_9515.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red sign?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000267463.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the black sign?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"One way"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000267463.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many cents per pound are the bananas?",
|
|
"answer": "100 cents",
|
|
"gt_answers": [
|
|
"99"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353806.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many dollars per pound are the Mandarines?",
|
|
"answer": "two dollars",
|
|
"gt_answers": [
|
|
"1.99"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353806.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green text word above Organic say?",
|
|
"answer": "certified organic",
|
|
"gt_answers": [
|
|
"certified"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353806.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written above STOP?",
|
|
"answer": "believing",
|
|
"gt_answers": [
|
|
"DON'T"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317264.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written below STOP?",
|
|
"answer": "believing",
|
|
"gt_answers": [
|
|
"BELIEVING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317264.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the monitor?",
|
|
"answer": "acer",
|
|
"gt_answers": [
|
|
"Soyo"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000458286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the game on the slot machine?",
|
|
"answer": "seven league",
|
|
"gt_answers": [
|
|
"SEVEN LEAGUE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_2390.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What channel is the movie on?",
|
|
"answer": "disney channel",
|
|
"gt_answers": [
|
|
"Disney Channel"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word at the top?",
|
|
"answer": "high",
|
|
"gt_answers": [
|
|
"High"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the movie?",
|
|
"answer": "high school musical",
|
|
"gt_answers": [
|
|
"High School Musical"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does not stop?",
|
|
"answer": "cars",
|
|
"gt_answers": [
|
|
"CROSS TRAFFIC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367983.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top word on the sign?",
|
|
"answer": "state",
|
|
"gt_answers": [
|
|
"STATE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000182029.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What collection is the sword from?",
|
|
"answer": "person collection",
|
|
"gt_answers": [
|
|
"Greg Domian Collection"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04141327_910.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what can you play?",
|
|
"answer": "ocarina",
|
|
"gt_answers": [
|
|
"ocarina"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03840681_12998.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say",
|
|
"answer": "don't block the box",
|
|
"gt_answers": [
|
|
"dont block the box FINE +2 points",
|
|
"Don't Block the Box Fine +2 Points"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000093119.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of cough drops are on the right side of the bottom shelf?",
|
|
"answer": "zoe",
|
|
"gt_answers": [
|
|
"Halls"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04443257_8266.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is on the glass?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"1678"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000204203.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after AVE",
|
|
"answer": "avenue",
|
|
"gt_answers": [
|
|
"West"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_61.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number comes after no.?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_61.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What food is the man eating?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Pizza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the third alphabet",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"C"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000329638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of instructions are listed?",
|
|
"answer": "cooking instructions",
|
|
"gt_answers": [
|
|
"Cooking"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009864.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which number does the bus have?",
|
|
"answer": "number",
|
|
"gt_answers": [
|
|
"53"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000181564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the vehicle?",
|
|
"answer": "it is japan",
|
|
"gt_answers": [
|
|
"P-18368",
|
|
"P.18368"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417043.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the red Bus headed?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"Euston"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the advertisement say on the side of the bus?",
|
|
"answer": "the world is your oyster",
|
|
"gt_answers": [
|
|
"Wicked is Spellbinding!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the boys shirt?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Sun Junior"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368041.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "narita express",
|
|
"gt_answers": [
|
|
"257"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259349.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which date is indicated in the picture",
|
|
"answer": "january 1",
|
|
"gt_answers": [
|
|
"Nov 26 2005"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348688.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN THERE",
|
|
"answer": "rtbr",
|
|
"gt_answers": [
|
|
"RTBF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364722.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the route number for bus 4306?",
|
|
"answer": "4305",
|
|
"gt_answers": [
|
|
"1428"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355481.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the front of the train?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"150208"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412036.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is on the bottom of this image?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"WWW.CORSIBOSCAIOLI.CH"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03000684_15071.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word in the text?",
|
|
"answer": "custode",
|
|
"gt_answers": [
|
|
"custode"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03000684_15071.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the player wearing?",
|
|
"answer": "23",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the stores name on the right?",
|
|
"answer": "dm",
|
|
"gt_answers": [
|
|
"GEOX"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345273.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the image",
|
|
"answer": "reflex",
|
|
"gt_answers": [
|
|
"Brownie"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_10808.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is to the right?",
|
|
"answer": "hudson street",
|
|
"gt_answers": [
|
|
"Greenwich St.",
|
|
"GREENWICH STREET"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the white arrow?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is closed?",
|
|
"answer": "west street",
|
|
"gt_answers": [
|
|
"Albany St.",
|
|
"ALBANY STREET"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is located at the bottom of the bag?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"Sports"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02769748_87439.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the word at the bottom of the bag?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"red?",
|
|
"RED"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02769748_87439.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bus say?",
|
|
"answer": "luzern bus",
|
|
"gt_answers": [
|
|
"Luzern Bahnhof",
|
|
"2 LUZERN BAHNHOF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412829.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Bridge called?",
|
|
"answer": "golden gate",
|
|
"gt_answers": [
|
|
"Oakland Bay",
|
|
"SAN FRANCISCO-OAKLAND BAY BRIDGE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410264.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is this view northerly, westerly, easterly or southerly?",
|
|
"answer": "southerly",
|
|
"gt_answers": [
|
|
"Easterly"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410264.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the players team number?",
|
|
"answer": "20",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the beer company sponsoring this team?",
|
|
"answer": "corona",
|
|
"gt_answers": [
|
|
"CORONA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000173506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is this sign?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"blue"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3617.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do we know about the knife at the top of the image?",
|
|
"answer": "it is a real knife",
|
|
"gt_answers": [
|
|
"real knife"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03041632_12487.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do we know about the knives at the bottom of the image?",
|
|
"answer": "they are real knives",
|
|
"gt_answers": [
|
|
"3D model"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03041632_12487.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the back of one man's white shirt?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"9"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the summit the backpack is from?",
|
|
"answer": "the summit",
|
|
"gt_answers": [
|
|
"IPv6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000285041.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What size number is the muzzle?",
|
|
"answer": "small breed",
|
|
"gt_answers": [
|
|
"0"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03803284_5129.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the 5 digit item number listed on the package?",
|
|
"answer": "quick fit muzzle",
|
|
"gt_answers": [
|
|
"59000"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03803284_5129.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the code written on the end of the aircraft?",
|
|
"answer": "f 16",
|
|
"gt_answers": [
|
|
"J-011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405560.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the restaurant name?",
|
|
"answer": "keslak",
|
|
"gt_answers": [
|
|
"Crown Fried Chicken"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the building name next to the restaurant?",
|
|
"answer": "kslak",
|
|
"gt_answers": [
|
|
"K.Slak",
|
|
"K SLAK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are visible on the shirt of the girl holding the drink?",
|
|
"answer": "lms",
|
|
"gt_answers": [
|
|
"SENIO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02437616_15433.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What building number are the giraffes by?",
|
|
"answer": "the giraffes are by building number 2",
|
|
"gt_answers": [
|
|
"16"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366378.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of food is this?",
|
|
"answer": "chips",
|
|
"gt_answers": [
|
|
"ATO CHIPS"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029596.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the SmartPower Blender",
|
|
"answer": "the smartpower blender is manufactured by person",
|
|
"gt_answers": [
|
|
"Cuisinart"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000431492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the Cuisinart Blender?",
|
|
"answer": "the cuisinart cbb 600",
|
|
"gt_answers": [
|
|
"SmartPower",
|
|
"SMARTPOWER DUET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000431492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What was the brand and model of the Blender?",
|
|
"answer": "the blender was a vitamix",
|
|
"gt_answers": [
|
|
"Cuisinart SmartPower",
|
|
"CUISINART SMARTPOWER DUET BLENDER/FOOD PROCESSOR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000431492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the blue book on the top self says",
|
|
"answer": "the blue book on the top self says",
|
|
"gt_answers": [
|
|
"Debunking Economics"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_500.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of film was used?",
|
|
"answer": "color film",
|
|
"gt_answers": [
|
|
"KODAK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371943.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this hotel?",
|
|
"answer": "the marriott marriott hotel",
|
|
"gt_answers": [
|
|
"Marriott"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000755.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Philippe Roynet"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/61517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What platform is the train currently at?",
|
|
"answer": "platform 1",
|
|
"gt_answers": [
|
|
"2c"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number printed on the train in the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"375913"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326750.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the blue plane?",
|
|
"answer": "klm",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_7740.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airport terminal name?",
|
|
"answer": "klm",
|
|
"gt_answers": [
|
|
"Anthony Fokker"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_7740.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the white building?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"73"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_7740.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store has a blue sign?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"Steve Madden"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_721.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which store logo is blue?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"Steve Madden"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_721.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store on the bottom floor with a white logo?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"Fossil"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_721.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the bus?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000433232.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many cheerleaders are there?",
|
|
"answer": "eight",
|
|
"gt_answers": [
|
|
"3",
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000433232.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Samsung"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000433232.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the copyrighted photographer of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"LOTHAR LENZ"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02395406_36871.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is listed?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.pferdefotoarchiv.de"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02395406_36871.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whose catering company owns this truck",
|
|
"answer": "latina catering",
|
|
"gt_answers": [
|
|
"Linda",
|
|
"LINDA'S CATERING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348219.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does Linda's company do?",
|
|
"answer": "linda's company is a food truck",
|
|
"gt_answers": [
|
|
"Catering"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348219.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of Handbook is it?",
|
|
"answer": "christianity",
|
|
"gt_answers": [
|
|
"A Handbook of Psychological disorders",
|
|
"PSYCHOLOGICAL DISORDERS"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028585.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of cereal is on top of the fridge?",
|
|
"answer": "corn flakes",
|
|
"gt_answers": [
|
|
"Honey Nut Cheerios"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713674.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the logo at the bottom on the train?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"AMTRAK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592917.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company besides AMTRAK is advertised?",
|
|
"answer": "mta",
|
|
"gt_answers": [
|
|
"Metra"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592917.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Bridget Samuels"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the homogenized milk?",
|
|
"answer": "kraft",
|
|
"gt_answers": [
|
|
"Darigold"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07831146_2895.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which spice is furthest to the right?",
|
|
"answer": "garlic",
|
|
"gt_answers": [
|
|
"Parsley"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07831146_2895.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the plane?",
|
|
"answer": "air canada",
|
|
"gt_answers": [
|
|
"Air Canada"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363291.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the green shirt?",
|
|
"answer": "shamrock",
|
|
"gt_answers": [
|
|
"King Monkey"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000267779.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red canister?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Le Creusei"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04270147_12943.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What place does the sign board showing?",
|
|
"answer": "manhattan",
|
|
"gt_answers": [
|
|
"MANHATTAN AV"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376680.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is shown in the sign board?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"ONE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376680.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the batter?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000243509.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline?",
|
|
"answer": "new zealand airways",
|
|
"gt_answers": [
|
|
"AIR NEW ZEALAND"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000487413.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website listed?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"NEWS.COM",
|
|
"NADALNEWS.COM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322755.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the shop on the top floor?",
|
|
"answer": "h&m",
|
|
"gt_answers": [
|
|
"alan & guys"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What logo is on the coffee cup?",
|
|
"answer": "illy",
|
|
"gt_answers": [
|
|
"Illy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325754.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What water brand is advertised?",
|
|
"answer": "evian",
|
|
"gt_answers": [
|
|
"Poland Spring"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407523.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which famous street in America, according to these road signs, is close to Broadway?",
|
|
"answer": "57th street",
|
|
"gt_answers": [
|
|
"WALL ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000008006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What button is on the right?",
|
|
"answer": "the radio button",
|
|
"gt_answers": [
|
|
"EQ"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004184.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what label stick on bottle",
|
|
"answer": "label stick on bottle",
|
|
"gt_answers": [
|
|
"bena"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344792.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is number plate of train",
|
|
"answer": "number plate of train",
|
|
"gt_answers": [
|
|
"2DI2",
|
|
"2D12"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where the train is going",
|
|
"answer": "to the station",
|
|
"gt_answers": [
|
|
"BANBURY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the orange wall?",
|
|
"answer": "santa clause",
|
|
"gt_answers": [
|
|
"SALE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sign say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"Aurelia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What has been written beneath the stop sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"IT!",
|
|
"It"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354240.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company sponsor featured on bottom center portion of the motorcycle?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"REPSOL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323132.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company sponsor featured in the blue and black logo on the main part of the morotrcycle?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Estrella Galicia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323132.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many players on the field",
|
|
"answer": "nine",
|
|
"gt_answers": [
|
|
"2",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364668.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is the name Mark Clarkson visible in this photo?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"mark clarkson"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000382953.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What stadium?",
|
|
"answer": "rodeo",
|
|
"gt_answers": [
|
|
"EL CAPITAN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the water tower say?",
|
|
"answer": "university",
|
|
"gt_answers": [
|
|
"Skymax"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350572.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street signs are seen?",
|
|
"answer": "two street signs",
|
|
"gt_answers": [
|
|
"Emerald and University"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350572.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the tail of the plane?",
|
|
"answer": "coulson",
|
|
"gt_answers": [
|
|
"Coulson Flying Tankers"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349748.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is displayed on the right player?",
|
|
"answer": "34",
|
|
"gt_answers": [
|
|
"34"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03379051_3383.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "According to the packaging, what type of nut is inside this product?",
|
|
"answer": "hazelnut",
|
|
"gt_answers": [
|
|
"Hazelnut.",
|
|
"HAZELNUT"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000031087.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the event shown?",
|
|
"answer": "us open tennis tournament",
|
|
"gt_answers": [
|
|
"us open"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000444953.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of one of the sponsors of the event?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"jpmorgan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000444953.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the website name",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"cityjet.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is number of the plane",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"00-VLF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Valley Central School"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000119449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the school?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Valley Central School"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000119449.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the white and blue bag to the far right of the image?",
|
|
"answer": "flowers",
|
|
"gt_answers": [
|
|
"SUGAR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000331417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are in this picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3408.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the soda company in the right window?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Coca Cola"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000487693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who photographed this picture and is credited on the bottom right of the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"daniele azzarelli"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000053672.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city name is written on the umbrella?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"New York"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the crab packing to?",
|
|
"answer": "crab packing",
|
|
"gt_answers": [
|
|
"TRAVEL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01978287_4564.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How large are the crabs?",
|
|
"answer": "small",
|
|
"gt_answers": [
|
|
"GIGANTIC"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01978287_4564.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the 48 hour pack cost?",
|
|
"answer": "$45",
|
|
"gt_answers": [
|
|
"$10.00"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01978287_4564.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address on the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.versaci.fr"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4609.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in yellow on the board?",
|
|
"answer": "waterhole creek",
|
|
"gt_answers": [
|
|
"Crocodile Dundee's"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370790.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the identification number on the side of the ship?",
|
|
"answer": "hms sydney",
|
|
"gt_answers": [
|
|
"F476"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02687172_19840.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car is this train?",
|
|
"answer": "dining car",
|
|
"gt_answers": [
|
|
"Dining car"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405886.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What animals are there in the picture?",
|
|
"answer": "tigers",
|
|
"gt_answers": [
|
|
"Tigers"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02129604_17329.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the plate?",
|
|
"answer": "truck parking",
|
|
"gt_answers": [
|
|
"TRUCK PARKING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367178.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last four numbers on the bike's license plate?",
|
|
"answer": "93",
|
|
"gt_answers": [
|
|
"8481"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000006846.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the jersey?",
|
|
"answer": "21",
|
|
"gt_answers": [
|
|
"121"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store where he is shopping ?",
|
|
"answer": "a market",
|
|
"gt_answers": [
|
|
"City Market"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367964.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE COLOR OF THE SNAKE?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"GREEN"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01739381_6519.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT DO YOU CALL THIS SNAKE?",
|
|
"answer": "vine snake",
|
|
"gt_answers": [
|
|
"VINE SNAKE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01739381_6519.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which organic food group is mentioned on the backdrop?",
|
|
"answer": "whole foods",
|
|
"gt_answers": [
|
|
"vegetables",
|
|
"Riverford organic Vegetables"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341378.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name on the bottom of this photograph?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Tom"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000207013.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of workshop does the sign show?",
|
|
"answer": "knitting",
|
|
"gt_answers": [
|
|
"Knitting",
|
|
"KNITTING WORKSHOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the author's first name?",
|
|
"answer": "elizabeth",
|
|
"gt_answers": [
|
|
"Elizabeth"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the author's last name?",
|
|
"answer": "baum",
|
|
"gt_answers": [
|
|
"Zimmermann"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is on the plane?",
|
|
"answer": "air china",
|
|
"gt_answers": [
|
|
"Air china"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331577.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What saints name is in this photo?",
|
|
"answer": "saint martin",
|
|
"gt_answers": [
|
|
"Georges"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357165.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 3 letters written above \"The French\"?",
|
|
"answer": "t",
|
|
"gt_answers": [
|
|
"Tri"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339138.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 3 digits visible behind the lady, written after \"Tel.\"?",
|
|
"answer": "917",
|
|
"gt_answers": [
|
|
"212"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339138.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the color of the plane?",
|
|
"answer": "blue and white",
|
|
"gt_answers": [
|
|
"White",
|
|
"WHITE AND BLUE STRIP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the plane?",
|
|
"answer": "moscow airlines",
|
|
"gt_answers": [
|
|
"SunExpress"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the Airplane?",
|
|
"answer": "lufthansa boeing 747",
|
|
"gt_answers": [
|
|
"LUFTHANSA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000111223.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the package on the upper right?",
|
|
"answer": "a sandwich",
|
|
"gt_answers": [
|
|
"sandwich"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the salad cost?",
|
|
"answer": "it's free",
|
|
"gt_answers": [
|
|
"105"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000259297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type dresses are sold at this store?",
|
|
"answer": "wedding dresses",
|
|
"gt_answers": [
|
|
"bridal special occasion"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000333663.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Do they sell anything besides dresses?",
|
|
"answer": "yes they sell a lot of shoes",
|
|
"gt_answers": [
|
|
"ACCESSORIES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000333663.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the baseball players jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000385837.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the address given in bus",
|
|
"answer": "no address is given in bus",
|
|
"gt_answers": [
|
|
"252 inglewood"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365391.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number given on bus",
|
|
"answer": "number given on bus",
|
|
"gt_answers": [
|
|
"1201",
|
|
"252"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365391.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which year is given",
|
|
"answer": "1963",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365391.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "mcf",
|
|
"gt_answers": [
|
|
"SK12 MVC",
|
|
"SK12MVC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whayt brand is the truck?",
|
|
"answer": "scania",
|
|
"gt_answers": [
|
|
"Scania"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415675.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the riders pants?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ROCKET"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000452167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number on the van?",
|
|
"answer": "mcd",
|
|
"gt_answers": [
|
|
"GN07 NWZ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the left border at the top of the border around the image?",
|
|
"answer": "train",
|
|
"gt_answers": [
|
|
"STOCK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the right border at the top of the image?",
|
|
"answer": "train",
|
|
"gt_answers": [
|
|
"URBIAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word that is upside down on the image's border in the top right corner?",
|
|
"answer": "train",
|
|
"gt_answers": [
|
|
"URBIAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the oil company?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"crockett"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000095267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the banner in the upper corner say?",
|
|
"answer": "imagine",
|
|
"gt_answers": [
|
|
"INMAGINE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04317175_5328.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the black jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000045659.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Purple Martini"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of photography does Purple Martini do?",
|
|
"answer": "wedding photography",
|
|
"gt_answers": [
|
|
"Wedding Photography"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in black letters?",
|
|
"answer": "venezia",
|
|
"gt_answers": [
|
|
"Uventina"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07745940_8569.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on dinning table",
|
|
"answer": "a bar",
|
|
"gt_answers": [
|
|
"GUINNESS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327207.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "street des 13 january",
|
|
"gt_answers": [
|
|
"StraBe des 13. Januar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362098.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "st george street",
|
|
"gt_answers": [
|
|
"St George Ct.",
|
|
"ST. GEORGE CT."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000127104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign in green letters read?",
|
|
"answer": "there is no place like home",
|
|
"gt_answers": [
|
|
"YOUR FUTURE IS THE NATURAL WORLD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000104325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word under the red plus sign?",
|
|
"answer": "android",
|
|
"gt_answers": [
|
|
"ice"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000255713.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the words on the sign seen over the man's shoulder?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000537608.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "divided highway",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000009176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of sale is this?",
|
|
"answer": "cosmetics",
|
|
"gt_answers": [
|
|
"Grand Anniversary SALE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_866.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second digit in the platform number for the red train?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000070363.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the writing on the side of the bus say?",
|
|
"answer": "urba",
|
|
"gt_answers": [
|
|
"URBANO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337609.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last two letters on the tail of the airplane?",
|
|
"answer": "cj",
|
|
"gt_answers": [
|
|
"TC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316420.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does #21's shirt say?",
|
|
"answer": "i love baseball",
|
|
"gt_answers": [
|
|
"Tolleson"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409878.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bar?",
|
|
"answer": "oasis",
|
|
"gt_answers": [
|
|
"Oasis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who click this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SEBASTIAN SOIDEN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000421341.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the large vertical sign?",
|
|
"answer": "park",
|
|
"gt_answers": [
|
|
"PARK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000186315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the address on the overhang?",
|
|
"answer": "the address on the overhang is 201 w st louis street",
|
|
"gt_answers": [
|
|
"129"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000186315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign that is under the overhang say?",
|
|
"answer": "the park",
|
|
"gt_answers": [
|
|
"PARKING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000186315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the player on the right?",
|
|
"answer": "garcia",
|
|
"gt_answers": [
|
|
"Castella"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343855.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does Tinder Box sell?",
|
|
"answer": "cigars",
|
|
"gt_answers": [
|
|
"Cigars"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04443257_17262.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "i b bachschmidt",
|
|
"gt_answers": [
|
|
"IB BALSCHMIDT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of trucking is shown?",
|
|
"answer": "land rover",
|
|
"gt_answers": [
|
|
"Land Rover"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594945_14596.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word in the corner of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"myprofe"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of car part are these items?",
|
|
"answer": "oil filter",
|
|
"gt_answers": [
|
|
"Oil Filter"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03843555_13542.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the oil filters?",
|
|
"answer": "subaru",
|
|
"gt_answers": [
|
|
"Subaru",
|
|
"SUBARU GENUINE PARTS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03843555_13542.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of Soda?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"DR PEPPER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000451800.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being celebrated?",
|
|
"answer": "a birthday",
|
|
"gt_answers": [
|
|
"Birthday"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000381856.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How old is this person turning?",
|
|
"answer": "60",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000381856.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book?",
|
|
"answer": "archie's joke book",
|
|
"gt_answers": [
|
|
"archie's joke book"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_9948.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building?",
|
|
"answer": "sony building",
|
|
"gt_answers": [
|
|
"Sony"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001189.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the photo?",
|
|
"answer": "surfers at the beach",
|
|
"gt_answers": [
|
|
"Surf Board Riders"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the location of this photo?",
|
|
"answer": "laguna beach",
|
|
"gt_answers": [
|
|
"Balboa - Calif"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the buses?",
|
|
"answer": "stran",
|
|
"gt_answers": [
|
|
"Stran"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4227.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the largest red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000348874.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going to?",
|
|
"answer": "downtown",
|
|
"gt_answers": [
|
|
"Berkeley Bart",
|
|
"BERKELEY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"14",
|
|
"1R"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the net's circled sign?",
|
|
"answer": "it says net",
|
|
"gt_answers": [
|
|
"Corona"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375499.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this roads name?",
|
|
"answer": "hardscrabble",
|
|
"gt_answers": [
|
|
"HARDSCRABBLE RD",
|
|
"Hardscrabble"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346720.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the license plate number?",
|
|
"answer": "mcdo",
|
|
"gt_answers": [
|
|
"18 hd"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159762.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the red bus?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"263"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company does the tow truck belong too?",
|
|
"answer": "towing company",
|
|
"gt_answers": [
|
|
"Sovereign"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the top red letters read?",
|
|
"answer": "ignitor",
|
|
"gt_answers": [
|
|
"Handling Instructions"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000249.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the nose of the airplane?",
|
|
"answer": "us",
|
|
"gt_answers": [
|
|
"FORCE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04552348_8200.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What arena is this hockey puck from?",
|
|
"answer": "san jose coliseum",
|
|
"gt_answers": [
|
|
"SAN JOSE ARENA",
|
|
"SAN JOSE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_11301.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the cost of bananas",
|
|
"answer": "the cost of bananas depends on where you buy them",
|
|
"gt_answers": [
|
|
"19c"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354893.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 3 letters on the plate?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"AMK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000455340.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WhatWhat color is the crosswalk",
|
|
"answer": "black and white",
|
|
"gt_answers": [
|
|
"White",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359366.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color are the trees",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"Green",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359366.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the man in the picture heading to?",
|
|
"answer": "a store",
|
|
"gt_answers": [
|
|
"World of Sports"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_284.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in white on the orange board?",
|
|
"answer": "the history of the toilet",
|
|
"gt_answers": [
|
|
"A sip of con"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000323470.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the bottom of the flag?",
|
|
"answer": "switzerland",
|
|
"gt_answers": [
|
|
"Chamonix"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351119.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What combination of letter and numbers are on the skiers shoulders?",
|
|
"answer": "sls",
|
|
"gt_answers": [
|
|
"A1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351119.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is the candy bar located?",
|
|
"answer": "in the living room",
|
|
"gt_answers": [
|
|
"LERON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000385070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of restaurant is behind the parked bikes?",
|
|
"answer": "coffee shop",
|
|
"gt_answers": [
|
|
"Sushi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347996.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was this picture taken?",
|
|
"answer": "summer",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01601694_21788.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which company took this picture?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"David Flores Photography"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01601694_21788.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT WORD IS WRITTEN ON THE METAL",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"BLUESBAND HOHNER INTERNATIONAL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03494278_40353.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THE PAPER",
|
|
"answer": "how to play the harmonica",
|
|
"gt_answers": [
|
|
"HERE'S HOW TO PLAY YOUR HOHNER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03494278_40353.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say below the clock?",
|
|
"answer": "tavern",
|
|
"gt_answers": [
|
|
"Manchester Road"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369184.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is under the logo",
|
|
"answer": "metro",
|
|
"gt_answers": [
|
|
"Metro"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412941.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is displayed on the surfboard stand?",
|
|
"answer": "view",
|
|
"gt_answers": [
|
|
"view"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411332.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer that took this picture?",
|
|
"answer": "michael mcdonald",
|
|
"gt_answers": [
|
|
"Koen Brouwer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403755.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the bottom of the snowboard in the foreground?",
|
|
"answer": "k2",
|
|
"gt_answers": [
|
|
"BURTON"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159441.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the bottom right of the picture?",
|
|
"answer": "boeing",
|
|
"gt_answers": [
|
|
"arnoldo riker",
|
|
"arnoldo.riker"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000497202.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What figure is written on the tag?",
|
|
"answer": "six",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337046.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "From which country is the pasport",
|
|
"answer": "from canada",
|
|
"gt_answers": [
|
|
"United States",
|
|
"UNITED STATES OF AMRICA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159307.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in green?",
|
|
"answer": "always welcome",
|
|
"gt_answers": [
|
|
"always welcome"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in red?",
|
|
"answer": "always welcome",
|
|
"gt_answers": [
|
|
"orkut143.com"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what company is written on the plane",
|
|
"answer": "airberlin",
|
|
"gt_answers": [
|
|
"Lufthansa"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_5724.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what text is on the plane?",
|
|
"answer": "airberlin",
|
|
"gt_answers": [
|
|
"Lufthansa"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_5724.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many forks are in the picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Two"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000002886.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many spoons are in the picture?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"One"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000002886.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference?",
|
|
"answer": "goyard",
|
|
"gt_answers": [
|
|
"goyard"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329292.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name above the key?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Caspari"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293605.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store selling head accessories?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"capking."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293605.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the building number?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"38"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293605.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Q: What company is the van from? A: TNT",
|
|
"answer": "a person",
|
|
"gt_answers": [
|
|
"TNT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000573807.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the police motorcycle?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"BU07 AZG",
|
|
"BUO7 AZG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337087.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of shop",
|
|
"answer": "hughes shoes",
|
|
"gt_answers": [
|
|
"Huey's"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04200800_9497.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what they sale",
|
|
"answer": "shoes",
|
|
"gt_answers": [
|
|
"shoes"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04200800_9497.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the size of shoes given",
|
|
"answer": "medium",
|
|
"gt_answers": [
|
|
"Wide, Medium, Narrow"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04200800_9497.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number of this bus?",
|
|
"answer": "no bus number",
|
|
"gt_answers": [
|
|
"117"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001125.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the red word on the sing on the right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"citroen"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000091868.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the column?",
|
|
"answer": "on the sea",
|
|
"gt_answers": [
|
|
"Salt Grill"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_838.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what word is enclosed in the box?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"corn"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000423356.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the yellow shirt?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Oshkosh"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the green sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"SURF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text appears on the red arrow?",
|
|
"answer": "soft flexible dispenser",
|
|
"gt_answers": [
|
|
"New & Improved"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What exit number is shown?",
|
|
"answer": "exit",
|
|
"gt_answers": [
|
|
"22"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048742.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What highway continues to the left?",
|
|
"answer": "i 95",
|
|
"gt_answers": [
|
|
"95"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048742.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is at this exit?",
|
|
"answer": "philadelphia avenue",
|
|
"gt_answers": [
|
|
"CALLOWHILL ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048742.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the tv brand name?",
|
|
"answer": "lg",
|
|
"gt_answers": [
|
|
"SOny"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000289545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the truck's color?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"blue"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04461696_5988.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes this can opener?",
|
|
"answer": "chef innox",
|
|
"gt_answers": [
|
|
"Chef Inox"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02951585_6086.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is this caboose?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"red"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373425.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the two large letters on the side of the caboose?",
|
|
"answer": "person and person",
|
|
"gt_answers": [
|
|
"NW"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373425.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers below the letters?",
|
|
"answer": "the car number",
|
|
"gt_answers": [
|
|
"518590"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373425.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on on the truck",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"stobart"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000216158.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of truck it this",
|
|
"answer": "is a person",
|
|
"gt_answers": [
|
|
"c a r r i e r"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000216158.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate #",
|
|
"answer": "dk dk",
|
|
"gt_answers": [
|
|
"08-0-74404"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000216158.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the image?",
|
|
"answer": "chimpanzee testing for food",
|
|
"gt_answers": [
|
|
"Chimpanzee fishing for food."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02481823_2363.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What designations are shown on the front of the train to the far left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Y 133"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466621.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What designations are shown on the lower front of the train to the far right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"J 515"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466621.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is below Half St?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"1200"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353139.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is does the white text say?",
|
|
"answer": "bibs",
|
|
"gt_answers": [
|
|
"patiently"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/6235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in blue say?",
|
|
"answer": "two babies laying in bed",
|
|
"gt_answers": [
|
|
"waiting"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/6235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What business would I reach if I called (415) 970-1015?",
|
|
"answer": "transportes quettal",
|
|
"gt_answers": [
|
|
"Transportes Quetzal"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332296.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the green bus number?",
|
|
"answer": "a bus",
|
|
"gt_answers": [
|
|
"336A",
|
|
"336"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red bus number?",
|
|
"answer": "x2",
|
|
"gt_answers": [
|
|
"147"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the green bus going to?",
|
|
"answer": "to the airport",
|
|
"gt_answers": [
|
|
"Loudwater Village"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of flight refer",
|
|
"answer": "wing",
|
|
"gt_answers": [
|
|
"ORBEST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"BALTIMORE ST."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405192.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the man's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000222506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the words on the yellow monkey fruit?",
|
|
"answer": "peel here",
|
|
"gt_answers": [
|
|
"PEEL HERE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349485.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the banana say?",
|
|
"answer": "peel here",
|
|
"gt_answers": [
|
|
"PEEL HERE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349485.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website on the advertisement?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"www.imob.sg"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign to the left advertising?",
|
|
"answer": "fast food",
|
|
"gt_answers": [
|
|
"HOLIDAY PLATTER"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_415.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the motorbike?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000174705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What clinic is one the left?",
|
|
"answer": "mri scan",
|
|
"gt_answers": [
|
|
"Healthway Medical"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_278.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text on the bottom right of the image?",
|
|
"answer": "i love onions",
|
|
"gt_answers": [
|
|
"sheeba"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03958227_4642.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is referenced in the lower right-hand corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"eyecreation",
|
|
"EYE CREATION"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000552346.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is in the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Matteo Catanese"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000563257.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the brand name of the blue shirt?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Hollister"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what clothing company is displayed on the blue shirt?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Hollister"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the train id number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"31193"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000282567.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red street sign asking you to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355131.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white flag with red letter say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GRA OPE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155198.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the leading, red car?",
|
|
"answer": "md 007",
|
|
"gt_answers": [
|
|
"HV03 JXE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000353260.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the container with the silver top?",
|
|
"answer": "salt",
|
|
"gt_answers": [
|
|
"Salt"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04131690_8780.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many cups can the clear container hold?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04131690_8780.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"377 605"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000122207.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"TAGHeuer"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_95.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the tire?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"LOCO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000346004.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the bike?",
|
|
"answer": "it is a cargo bike",
|
|
"gt_answers": [
|
|
"Scott"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000346004.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the bike tire?",
|
|
"answer": "michelin",
|
|
"gt_answers": [
|
|
"Loco"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000346004.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the location they are at?",
|
|
"answer": "schoenboden",
|
|
"gt_answers": [
|
|
"Schonbodenb"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000296374.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What river does this bridge cross?",
|
|
"answer": "willamette river",
|
|
"gt_answers": [
|
|
"Willamette River"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What county is the Willamette River located?",
|
|
"answer": "marion county",
|
|
"gt_answers": [
|
|
"Marion County"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of LaunchPad as written on the wall?",
|
|
"answer": "launchpad",
|
|
"gt_answers": [
|
|
"JTC"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_252.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the window of the bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"N 30"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362829.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the bumper of the bus?",
|
|
"answer": "57",
|
|
"gt_answers": [
|
|
"215"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362829.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the back door of the bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"STOP WHEN RED LIGHTS FLASH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362829.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What timezone is the clock tracking?",
|
|
"answer": "utc",
|
|
"gt_answers": [
|
|
"New York"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315403.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number bordered by orange?",
|
|
"answer": "15",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410648.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text bordered by green say?",
|
|
"answer": "go great 15",
|
|
"gt_answers": [
|
|
"E45"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410648.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the orgainziation?",
|
|
"answer": "hong kong parrot society",
|
|
"gt_answers": [
|
|
"BIRD SHOW"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/498315.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the sharpener?",
|
|
"answer": "sharpie",
|
|
"gt_answers": [
|
|
"panasonic"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04116512_19239.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the truck?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GMC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000032455.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the top of the train?",
|
|
"answer": "hmr",
|
|
"gt_answers": [
|
|
"MINIATURE RAILWAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415147.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What three letters are on the side of the train?",
|
|
"answer": "hmr",
|
|
"gt_answers": [
|
|
"HMR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415147.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the largest sign say?",
|
|
"answer": "sara",
|
|
"gt_answers": [
|
|
"Sasa"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website owns this image?",
|
|
"answer": "flikr",
|
|
"gt_answers": [
|
|
"Pierto.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366609.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company truck?",
|
|
"answer": "united",
|
|
"gt_answers": [
|
|
"UNITED"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03796401_2258.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what word is on the white and black shoe in the mifddle",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"grace"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04200800_16283.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the numbers?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"275"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000224601.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the license plate from?",
|
|
"answer": "new jersey",
|
|
"gt_answers": [
|
|
"California"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345818.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the license plate say?",
|
|
"answer": "a dog",
|
|
"gt_answers": [
|
|
"6CTA983"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345818.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the bird box?",
|
|
"answer": "the rock city",
|
|
"gt_answers": [
|
|
"See Rock City"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02843684_7133.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the plane?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"british airways"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000397986.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What logo is in black print?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BRIDGESTONE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000429266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sponsor is in red letters?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"cmra"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000429266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is depicted in this image?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000409825.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the largest word on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000140613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "For UTC how much is the private master suite per month?",
|
|
"answer": "the private master suite per month is 0",
|
|
"gt_answers": [
|
|
"$800"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06785654_15323.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which number must be called in case of emergency?",
|
|
"answer": "911",
|
|
"gt_answers": [
|
|
"911"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315483.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of controller is being used?",
|
|
"answer": "xbox 360",
|
|
"gt_answers": [
|
|
"COX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000469405.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHICH TYPE OF VEHICLE TAKING ANOTHER VEHICLE?",
|
|
"answer": "tow truck",
|
|
"gt_answers": [
|
|
"RECOVERY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000071714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in front of bus ?",
|
|
"answer": "face manila",
|
|
"gt_answers": [
|
|
"Free mallride"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713594.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of vehicle is the red vehicle?",
|
|
"answer": "toyota",
|
|
"gt_answers": [
|
|
"Toyota"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what shop is written on the top left?",
|
|
"answer": "h&m",
|
|
"gt_answers": [
|
|
"Marks & Spencer"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_391.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name printed on the red sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Merrell"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264241.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the novel?",
|
|
"answer": "the pirates and the scientists",
|
|
"gt_answers": [
|
|
"The Pirates! In an adventure with Scientists"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07248320_4007.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the Author?",
|
|
"answer": "gillian slade",
|
|
"gt_answers": [
|
|
"Gideon Defoe"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07248320_4007.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What cat brand is shown on the tin top?",
|
|
"answer": "hello kitty",
|
|
"gt_answers": [
|
|
"HELLO KITTY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000395030.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What beverage is showing a label with the food?",
|
|
"answer": "soda",
|
|
"gt_answers": [
|
|
"JUICE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000395030.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does his shirt say?",
|
|
"answer": "i love tennis",
|
|
"gt_answers": [
|
|
"Nike"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of bird is this?",
|
|
"answer": "sandgrouse",
|
|
"gt_answers": [
|
|
"australian bustard"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02018795_599.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took the photograph?",
|
|
"answer": "the australian birdlife society",
|
|
"gt_answers": [
|
|
"Phil Gregory"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02018795_599.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the photo taken?",
|
|
"answer": "2003",
|
|
"gt_answers": [
|
|
"2005"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02018795_599.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is listed Largely on the side of the Truck?",
|
|
"answer": "statewide",
|
|
"gt_answers": [
|
|
"Statewide"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1160033.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the shop in the image?",
|
|
"answer": "chanel",
|
|
"gt_answers": [
|
|
"SPRIT"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_396.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is one thing displayed in this picture?",
|
|
"answer": "toilets",
|
|
"gt_answers": [
|
|
"Toilet"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000286359.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the bus display?",
|
|
"answer": "a map",
|
|
"gt_answers": [
|
|
"NYCT Bus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159361.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of car is in the picture?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"Jovial Car"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370548.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say to stop doing?",
|
|
"answer": "killing gays",
|
|
"gt_answers": [
|
|
"Killing gays"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much off is posted on the sale sign?",
|
|
"answer": "20%",
|
|
"gt_answers": [
|
|
"50%.",
|
|
"50%"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_175.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on the insignia?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"ER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000357696.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the paper in the bottom right corner?",
|
|
"answer": "''the b 52''",
|
|
"gt_answers": [
|
|
"DOUGLAS A-3D SKYWARRIOR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000425433.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cloth used for?",
|
|
"answer": "curtains",
|
|
"gt_answers": [
|
|
"Shower curtain"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04209239_10173.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name?",
|
|
"answer": "splash",
|
|
"gt_answers": [
|
|
"Splash",
|
|
"Splash bath"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04209239_10173.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is the wine?",
|
|
"answer": "blackberry",
|
|
"gt_answers": [
|
|
"Blackberry"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the two letter written in white on the side of the ramp?",
|
|
"answer": "sk8",
|
|
"gt_answers": [
|
|
"DV"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402345.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after BODY?",
|
|
"answer": "shop",
|
|
"gt_answers": [
|
|
"SHOP"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_782.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage off is this sale?",
|
|
"answer": "50%",
|
|
"gt_answers": [
|
|
"40%",
|
|
"40"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_782.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is listed twice?",
|
|
"answer": "sea",
|
|
"gt_answers": [
|
|
"SEA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word that begins with \"M\"?",
|
|
"answer": "mine",
|
|
"gt_answers": [
|
|
"MIST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408188.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the black car?",
|
|
"answer": "sheriff",
|
|
"gt_answers": [
|
|
"Sheriff"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03977966_34862.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the biscotti cost?",
|
|
"answer": "$2 each",
|
|
"gt_answers": [
|
|
"0.6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000138025.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the mini krapfen cost?",
|
|
"answer": "it's free",
|
|
"gt_answers": [
|
|
"0.6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000138025.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store shown?",
|
|
"answer": "h&m",
|
|
"gt_answers": [
|
|
"swatch"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_220.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the country name in his shirt?",
|
|
"answer": "australia",
|
|
"gt_answers": [
|
|
"Columbia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many year?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"10 year"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000494297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phone number is listed on the box?",
|
|
"answer": "the number is listed on the box",
|
|
"gt_answers": [
|
|
"650-368-5908"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420363.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What road is Wegman's Nursery on?",
|
|
"answer": "wegman road",
|
|
"gt_answers": [
|
|
"Woodside Road"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420363.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the box above the web address?",
|
|
"answer": "daffodils in pots",
|
|
"gt_answers": [
|
|
"Open 7 Days a Week"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000420363.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word on the sign to the left of the image?",
|
|
"answer": "highway",
|
|
"gt_answers": [
|
|
"CAI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000315348.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street name does the signpost say?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"HAIGHT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the country on the orange sign?",
|
|
"answer": "south africa",
|
|
"gt_answers": [
|
|
"Malaysia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written at the top of the vehicle?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"School Bus"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001122.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bus company?",
|
|
"answer": "scottish citylink",
|
|
"gt_answers": [
|
|
"Arriva"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407415.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the big sign with white words and red background say?",
|
|
"answer": "sale",
|
|
"gt_answers": [
|
|
"Sale"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_390.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is title associated with Gloucester?",
|
|
"answer": "city of gloucester",
|
|
"gt_answers": [
|
|
"Duke"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "henry street",
|
|
"gt_answers": [
|
|
"S. HENRY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the two letters on the red sign?",
|
|
"answer": "bc",
|
|
"gt_answers": [
|
|
"DQ"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000442326.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is printed in the top right corner?",
|
|
"answer": "australia",
|
|
"gt_answers": [
|
|
"WALES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which words are place names?",
|
|
"answer": "london london",
|
|
"gt_answers": [
|
|
"CYMRU NY MUNCHEN BAYERN WALES",
|
|
"BAYERN WALES NY MUNCHEN CYMRU"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is printed on the square white sticker?",
|
|
"answer": "rome",
|
|
"gt_answers": [
|
|
"NY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many rolls come in a package?",
|
|
"answer": "12",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n15075141_13342.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the underlined white words?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Brick Oven Pizza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357756.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in bold capitals on the top 2 lines?",
|
|
"answer": "toilet and sink",
|
|
"gt_answers": [
|
|
"DIRECTIONS FOR OPERATING BEFORE USING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000250607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What herb is on the Parmesan bagel?",
|
|
"answer": "basil",
|
|
"gt_answers": [
|
|
"Oregano"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329482.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written left side corner",
|
|
"answer": "a frisbee",
|
|
"gt_answers": [
|
|
"gonzalo salas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342132.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being advertised on the far left?",
|
|
"answer": "beer",
|
|
"gt_answers": [
|
|
"coffee"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340256.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the watermark in the bottom right corner of the picture say?",
|
|
"answer": "wwii aircraft",
|
|
"gt_answers": [
|
|
"Desrosiers Photo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346561.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom of the snowboard say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ride",
|
|
"RIDE DH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000407178.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this traffic light located?",
|
|
"answer": "wall street",
|
|
"gt_answers": [
|
|
"Wall St"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336393.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word surrounded by red",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000563233.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the green motorcycle",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"383"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327718.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word can you see three (3) times?",
|
|
"answer": "bird",
|
|
"gt_answers": [
|
|
"rim rim rim",
|
|
"rim"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000260050.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the right?",
|
|
"answer": "subway",
|
|
"gt_answers": [
|
|
"Subway"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_69.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What restaurant is depicted in yellow and white text?",
|
|
"answer": "subway",
|
|
"gt_answers": [
|
|
"Subway"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_69.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of key is needed?",
|
|
"answer": "a product key is needed",
|
|
"gt_answers": [
|
|
"Drive Encryption"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015970.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name printed on the sign and the two spice bottles in the background?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PLANET DAILIES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the lower left corner of the placemat beneath the plate?",
|
|
"answer": "the name of the restaurant",
|
|
"gt_answers": [
|
|
"BEACH BESTS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed within a red background on the lower right corner of the placemat?",
|
|
"answer": "person's grill",
|
|
"gt_answers": [
|
|
"TUR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "public market farmers",
|
|
"gt_answers": [
|
|
"Public Market Farmers"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000538407.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the caera in the image?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Pentax"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_12300.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is he playing",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the match timer say",
|
|
"answer": "the match timer shows the time of the match",
|
|
"gt_answers": [
|
|
"0:58"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many towels do you see?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"one"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word in red is on the bottom right of the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"superseded"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the shirt of the man in red?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"Dr Pepper"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285958.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is printed over the doorway?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"206"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/285958.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the writer of the book The Counterfeit Guest that is shown in the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Rose Melikan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000144812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book written by Nick McDonell that is shown>",
|
|
"answer": "we are beginning our desert",
|
|
"gt_answers": [
|
|
"Twelve"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000144812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book written by James Meek that is shown?",
|
|
"answer": "we are beginning our desert",
|
|
"gt_answers": [
|
|
"We are Now Beginning Our Descent"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000144812.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many miles is liverpool?",
|
|
"answer": "100 miles",
|
|
"gt_answers": [
|
|
"302"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3047.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is above ONTARIO?",
|
|
"answer": "street",
|
|
"gt_answers": [
|
|
"PLACE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341602.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is below RUE?",
|
|
"answer": "rue",
|
|
"gt_answers": [
|
|
"DU MUSEE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341602.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THIS COUNTRY?",
|
|
"answer": "india",
|
|
"gt_answers": [
|
|
"SINGAPORE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_191.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN THIS ADVERTISEMENT BOARD?",
|
|
"answer": "a movie poster",
|
|
"gt_answers": [
|
|
"ULTIMATE WILDLIFE EXPERIENCE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_191.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the team name on the uniform?",
|
|
"answer": "philadelphia phillies",
|
|
"gt_answers": [
|
|
"Jays"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the sponsors' letters in the light green box?",
|
|
"answer": "mlb",
|
|
"gt_answers": [
|
|
"TD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "m15",
|
|
"gt_answers": [
|
|
"6658"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402460.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the cans that the men are standing on?",
|
|
"answer": "red bull",
|
|
"gt_answers": [
|
|
"Red Bull"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354271.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit?",
|
|
"answer": "20",
|
|
"gt_answers": [
|
|
"20"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000497608.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT TYPE OF SPICE IS IT",
|
|
"answer": "ground ginger",
|
|
"gt_answers": [
|
|
"GROUND GINGER",
|
|
"GINGER"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008222.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHICH BRAND IS IT",
|
|
"answer": "ground ginger",
|
|
"gt_answers": [
|
|
"KROGER"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008222.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NET WT",
|
|
"answer": "ground ginger 1 oz",
|
|
"gt_answers": [
|
|
"1.5 OZ AND 42 G",
|
|
"1.5 OZ"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008222.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much of a discount is offered here?",
|
|
"answer": "50%",
|
|
"gt_answers": [
|
|
"Up to 50% off"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat?",
|
|
"answer": "tokyo express",
|
|
"gt_answers": [
|
|
"Tokyo Express"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03095699_10479.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white capital letters on the front of the boat?",
|
|
"answer": "express",
|
|
"gt_answers": [
|
|
"Tokyo Express"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03095699_10479.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the building?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"fiori"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/963.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the four-letter word on the blue signs?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BOWL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000079152.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the first chart on the left say at the top?",
|
|
"answer": "the number of houses built in a year",
|
|
"gt_answers": [
|
|
"perkalian 1-200"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000055874.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the licence plate?",
|
|
"answer": "fair cars",
|
|
"gt_answers": [
|
|
"Volkswagen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342109.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue part of the sign?",
|
|
"answer": "aracha",
|
|
"gt_answers": [
|
|
"A-1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341447.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the train?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"41"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332201.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter of the Los Angeles catcher's name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"J."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000163553.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the sponsor on the back wall?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TJ Maxx",
|
|
"T.J. MAXX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000498082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white on the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MAX MILLER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000158036.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the blue bus?",
|
|
"answer": "toyota",
|
|
"gt_answers": [
|
|
"Higer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406967.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store sells vries shirts?",
|
|
"answer": "vries shop",
|
|
"gt_answers": [
|
|
"vries"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000565360.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the magazine name on the table?",
|
|
"answer": "health",
|
|
"gt_answers": [
|
|
"Metro"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2006.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the manufacturing company that made this truck?",
|
|
"answer": "isuzu",
|
|
"gt_answers": [
|
|
"Isuzu"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03796401_5310.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what capital letters are written on the purple sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"nyu"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000224959.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red and white sign on the right read?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"NO STANDING ANYTIME"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1160130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in front of the train?",
|
|
"answer": "nyc",
|
|
"gt_answers": [
|
|
"OBB"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371066.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the display window?",
|
|
"answer": "ice cream",
|
|
"gt_answers": [
|
|
"THE NEW EXPLOSIVE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_528.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is shown for the train leaving to Uitgeest?",
|
|
"answer": "the time shown for the train leaving to uitgeest",
|
|
"gt_answers": [
|
|
"12:47"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the platform number?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"8a"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is the final destination of the train?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"Uitgeest"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the left neon sign say?",
|
|
"answer": "open",
|
|
"gt_answers": [
|
|
"OPEN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349620.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is listed as the copyright?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01632458_11130.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who submitted this picture?",
|
|
"answer": "daniel",
|
|
"gt_answers": [
|
|
"Michael F. Benard"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01632458_11130.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this airline?",
|
|
"answer": "klm",
|
|
"gt_answers": [
|
|
"KLM ASIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000221532.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of this train?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"2893"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372076.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What model number is shown after the word \"Vocalist\"?",
|
|
"answer": "yamaha dsp vocalist",
|
|
"gt_answers": [
|
|
"VL3D"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018207.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red sign?",
|
|
"answer": "railway street",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000195860.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the dog's harness?",
|
|
"answer": "ski",
|
|
"gt_answers": [
|
|
"Mini"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02109525_37925.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is on the bottle?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000204529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after pinot on the bottle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BLANC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000204529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store shown?",
|
|
"answer": "sephora",
|
|
"gt_answers": [
|
|
"Sephora"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the name of the Kiosk?",
|
|
"answer": "american express",
|
|
"gt_answers": [
|
|
"American Express"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_783.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number shown on the back of the batter's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"9"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000040995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name shown on the back of the batter's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"johnson"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000040995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this photo from?",
|
|
"answer": "australia",
|
|
"gt_answers": [
|
|
"dreamstime.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03935335_13903.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are playing frisbee?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three",
|
|
"THREE PEOPLE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000142229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the organization on the sign?",
|
|
"answer": "fortuna mountain rescue team",
|
|
"gt_answers": [
|
|
"BORDER ROADS"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7777.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who made this laptop?",
|
|
"answer": "dell",
|
|
"gt_answers": [
|
|
"Dell"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter on the screen?",
|
|
"answer": "t",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When can you stop here?",
|
|
"answer": "when the light turns green",
|
|
"gt_answers": [
|
|
"NO STOPPING ANYTIME"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000010566.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the queens name that is on this boat?",
|
|
"answer": "victoria",
|
|
"gt_answers": [
|
|
"QUEEN ELIZABETH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355707.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what popular internet search engine is advertised here",
|
|
"answer": "google",
|
|
"gt_answers": [
|
|
"YAHOO!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411346.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the black text on the license plate?",
|
|
"answer": "les b\u00eates",
|
|
"gt_answers": [
|
|
"CL 637 AP"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4146.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of soda does this machine vend?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"coke"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_10428.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the jersey number of the player to the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"32"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000528117.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the front of the jersey on the player to the right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"orioles"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000528117.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name printed on the back of the player to the left's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"KIETERS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000528117.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the city name in image?",
|
|
"answer": "miami",
|
|
"gt_answers": [
|
|
"sydney"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359419.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000290168.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two readable letters are being reflected in his glasses?",
|
|
"answer": "donuts",
|
|
"gt_answers": [
|
|
"LU"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400931.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website address is in the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"mhbaker.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000208907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Michael R. Hicks"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000343857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bottom sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408075.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is on the sign?",
|
|
"answer": "hong kong",
|
|
"gt_answers": [
|
|
"Hong Kong"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of 1st board refer",
|
|
"answer": "1st board refer is the sign of 1st board refer",
|
|
"gt_answers": [
|
|
"firehol",
|
|
"FIREHOU"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352662.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of 2nd board refer",
|
|
"answer": "2nd board refer is the sign of 2nd board refer",
|
|
"gt_answers": [
|
|
"inspiration"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352662.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company of the train?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"Javelin"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376799.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is located below three triangles red, whit and blue?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Majestic"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000336077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the batter's jersey?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000336077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word can be found on the wall in the bull pit, above the photographer with the white camera?",
|
|
"answer": "bull",
|
|
"gt_answers": [
|
|
"new"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000336077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "tah",
|
|
"gt_answers": [
|
|
"W302 NTX"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402601.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is this plane for?",
|
|
"answer": "ryanair",
|
|
"gt_answers": [
|
|
"Ryanair"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000432218.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the school bus?",
|
|
"answer": "the number of the bus",
|
|
"gt_answers": [
|
|
"21"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001089.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the breed of dog that is shown in the photo?",
|
|
"answer": "jack russell terrier",
|
|
"gt_answers": [
|
|
"Jack Russell Terrier",
|
|
"TERRIER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335241.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the average life expectancy for this breed of dog?",
|
|
"answer": "8 to 10 years",
|
|
"gt_answers": [
|
|
"13-24 years",
|
|
"13-14"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335241.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is he practicing?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What colour is he wearing?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of sauce is the jar?",
|
|
"answer": "tomato sauce",
|
|
"gt_answers": [
|
|
"Vegetarian"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is with the sauce?",
|
|
"answer": "tomato sauce",
|
|
"gt_answers": [
|
|
"Soy"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the net weight in g?",
|
|
"answer": "450 g",
|
|
"gt_answers": [
|
|
"680",
|
|
"680G"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Do the letters JSD appear on this image?",
|
|
"answer": "no",
|
|
"gt_answers": [
|
|
"JSD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000092197.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much do blood orange doughnuts cost?",
|
|
"answer": "$2 each",
|
|
"gt_answers": [
|
|
"$2.00"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337555.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the place that made this wine?",
|
|
"answer": "becker vineyard",
|
|
"gt_answers": [
|
|
"BECKER VINEYARDS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409865.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the bus?",
|
|
"answer": "dmc 005",
|
|
"gt_answers": [
|
|
"X59IUSC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000060458.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the bus?",
|
|
"answer": "dmc 005",
|
|
"gt_answers": [
|
|
"X59IUSC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000060458.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of dog is this?",
|
|
"answer": "basset hound",
|
|
"gt_answers": [
|
|
"Basset Hound"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02088238_8110.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the fruit?",
|
|
"answer": "banana",
|
|
"gt_answers": [
|
|
"Banana"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348731.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the spread in the container?",
|
|
"answer": "nutella",
|
|
"gt_answers": [
|
|
"Nutella"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348731.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sticker on the front of the truck say?",
|
|
"answer": "i am a soldier",
|
|
"gt_answers": [
|
|
"Lucky Lizard"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000295657.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company owning this plane",
|
|
"answer": "united airlines",
|
|
"gt_answers": [
|
|
"UNITED"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344305.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website advertised?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"usopen.org"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is advertised behind the player?",
|
|
"answer": "adidas",
|
|
"gt_answers": [
|
|
"OLYMPUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is this plate from?",
|
|
"answer": "mexico",
|
|
"gt_answers": [
|
|
"Mexico"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Licence plate number?",
|
|
"answer": "rl 9",
|
|
"gt_answers": [
|
|
"508 RL 9",
|
|
"508 RL-9"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is displayed in this picture?",
|
|
"answer": "sandwich",
|
|
"gt_answers": [
|
|
"sandwich"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which direction can cars not turn?",
|
|
"answer": "right",
|
|
"gt_answers": [
|
|
"Right",
|
|
"right turn"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325024.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THE TAIL OF THE WHITE PLANE?",
|
|
"answer": "qantas",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369343.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THE RED TAIL OF THE SECOND PLANE?",
|
|
"answer": "qantas",
|
|
"gt_answers": [
|
|
"VIRGIN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369343.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this brand?",
|
|
"answer": "r e d",
|
|
"gt_answers": [
|
|
"beb",
|
|
"bcb"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03690938_793.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the train?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"73"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410729.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of resturant is behind them?",
|
|
"answer": "fast food",
|
|
"gt_answers": [
|
|
"Steakhouse",
|
|
"STEAKHOUSE CHICAGO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159291.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed on the green shirt?",
|
|
"answer": "peace",
|
|
"gt_answers": [
|
|
"FROM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the jersey?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"99"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065518.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What animal is this?",
|
|
"answer": "eastern diamondback rattlesnake",
|
|
"gt_answers": [
|
|
"Eastern diamondback rattle snake"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01755581_12982.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company made the accordian?",
|
|
"answer": "hohner",
|
|
"gt_answers": [
|
|
"Hohner"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02672831_17131.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is the picture copyright?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338128.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of camera is being used?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03314780_12192.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the players number?",
|
|
"answer": "23",
|
|
"gt_answers": [
|
|
"34"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371802.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team is the pitcher with?",
|
|
"answer": "mets",
|
|
"gt_answers": [
|
|
"Mets"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371802.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of this product?",
|
|
"answer": "crock pot",
|
|
"gt_answers": [
|
|
"Crock-Pot",
|
|
"Crock Pot"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03133878_193.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written below the logo?",
|
|
"answer": "crock pot",
|
|
"gt_answers": [
|
|
"Little Dipper"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03133878_193.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the player's jersey number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"38"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000539167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on his shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"38"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000539167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time was this photo take?",
|
|
"answer": "at night",
|
|
"gt_answers": [
|
|
"8:47 am",
|
|
"8:47"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000014653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Was this photo taken during am or pm?",
|
|
"answer": "am",
|
|
"gt_answers": [
|
|
"am"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000014653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the guy's shirt say?",
|
|
"answer": "he is a snowboarder",
|
|
"gt_answers": [
|
|
"devil inside"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the building sign?",
|
|
"answer": "morgan records",
|
|
"gt_answers": [
|
|
"Mother Redcaps"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3453.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the destination of bus route 450?",
|
|
"answer": "san francisco international airport",
|
|
"gt_answers": [
|
|
"ARTESIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000001720.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the measurement unit?",
|
|
"answer": "cm",
|
|
"gt_answers": [
|
|
"Inches"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04118776_34781.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the vegetable that has its price listed on the lower shelf?",
|
|
"answer": "carrots",
|
|
"gt_answers": [
|
|
"CARROTS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000277788.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What should you watch?",
|
|
"answer": "your back",
|
|
"gt_answers": [
|
|
"your back"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_32397.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is sweet?",
|
|
"answer": "revenge",
|
|
"gt_answers": [
|
|
"revenge"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_32397.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the side of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ricksha Bus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000455333.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "yankee way",
|
|
"gt_answers": [
|
|
"Yawkey Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359343.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played in the image?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Baseball"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team in the player with the bat playing for?",
|
|
"answer": "texas rangers",
|
|
"gt_answers": [
|
|
"Texas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red portion of the sign say?",
|
|
"answer": "nyc subway station",
|
|
"gt_answers": [
|
|
"exit"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street is this subway stop for?",
|
|
"answer": "avenue of the Americas",
|
|
"gt_answers": [
|
|
"25 street"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What avenue is this subway stop for?",
|
|
"answer": "avenue of the Americas",
|
|
"gt_answers": [
|
|
"8 avenue"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254081.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is available in the office?",
|
|
"answer": "there is a kitchen in the office",
|
|
"gt_answers": [
|
|
"OVEN TRAYS DISHES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365927.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word does the bus sign say below the icon of the bus?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"Terminal"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What the name of the restaurant",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"DOUGHNUT plant"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000474550.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say in the top of the middle image?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"The Amazing Everyday"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many persons appear in the picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02403003_5442.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many animals are pulling the cart?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02403003_5442.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "st mary's church",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000395007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what video game console is being played? A: Wii",
|
|
"answer": "a wii",
|
|
"gt_answers": [
|
|
"Wii"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000452806.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the top sign?",
|
|
"answer": "thomas path",
|
|
"gt_answers": [
|
|
"Museum"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345669.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TOWING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000463859.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Ski patrol"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000071122.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline does this plane belong to?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000072895.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are written on the tail of the plane?",
|
|
"answer": "klm",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000072895.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters represent on the aircraft represent Koninklijke Luchtvaart Maatschappij airline?",
|
|
"answer": "klm",
|
|
"gt_answers": [
|
|
"KLM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000072895.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign above the stove say?",
|
|
"answer": "texas",
|
|
"gt_answers": [
|
|
"Aggie Xing"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000357898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this man's hat say?",
|
|
"answer": "i am a man",
|
|
"gt_answers": [
|
|
"Milwaukee"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/286062.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the black boarder of the photo booth?",
|
|
"answer": "photo booth",
|
|
"gt_answers": [
|
|
"Photosmart Studio"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1245.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the black sticker say.",
|
|
"answer": "bike and destroy",
|
|
"gt_answers": [
|
|
"Bike and Destroy"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401065.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top sign say?",
|
|
"answer": "winchester avenue",
|
|
"gt_answers": [
|
|
"n main st"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000026266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bottom sign say?",
|
|
"answer": "winchester avenue",
|
|
"gt_answers": [
|
|
"winchester ave"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000026266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which sign says 'ave'?",
|
|
"answer": "winchester ave",
|
|
"gt_answers": [
|
|
"winchester"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000026266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue backbord",
|
|
"answer": "jc penney",
|
|
"gt_answers": [
|
|
"JPMorgan"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369030.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is listed on the clock?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"London"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000172947.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the only readable word on the man's shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Rebellion"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000065588.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is does the sign below the clock say in bold letters?",
|
|
"answer": "sam's club",
|
|
"gt_answers": [
|
|
"CLOCK TOWER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357647.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this subway station?",
|
|
"answer": "camden town station",
|
|
"gt_answers": [
|
|
"Camden Town Station"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the car?",
|
|
"answer": "24",
|
|
"gt_answers": [
|
|
"22"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04037443_36548.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the largest name located on the restaurant sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Cane's"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000075493.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name listed on the top of the two street signs?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"trust",
|
|
"Trust Dr"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000075493.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name listed at the bottom of the two street signs?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Corporate",
|
|
"Corporate Blvd"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000075493.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what sport is being performed in this picture?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"baseball",
|
|
"Baseball."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city team is it?",
|
|
"answer": "royals",
|
|
"gt_answers": [
|
|
"Kansas City",
|
|
"Kansas City."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant at the top of the image?",
|
|
"answer": "spicy sweet restaurant",
|
|
"gt_answers": [
|
|
"SPICY & SWEET RESTAURANT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07565083_10724.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What telephone number is listed at the bottom of the menu?",
|
|
"answer": "the phone number for spicy and sweet restaurant",
|
|
"gt_answers": [
|
|
"416-651-6363"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07565083_10724.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number that is printed on the bus window?",
|
|
"answer": "77",
|
|
"gt_answers": [
|
|
"US141"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356663.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is shown here?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"Canada"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000260407.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word to the right of the copyright logo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"luca"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000103935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does is say on this buses blue sign in white capital letters?",
|
|
"answer": "lincoln city centre",
|
|
"gt_answers": [
|
|
"GERTCHA SELF DOWN TO LONDON BY TRAIN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365410.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the red traffic sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357432.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is SALE! written in?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001014.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is around SALE!?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Purple"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001014.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many letters are in the picture?",
|
|
"answer": "five",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001014.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photographer that took this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Andreas",
|
|
"ANDREAS SCHAUER-VILLANUEVA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000164944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"463"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000123935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"278849"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000123935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What amusement park is mentioned on the banner in the stands?",
|
|
"answer": "amusement park",
|
|
"gt_answers": [
|
|
"Disneyland"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000550655.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What isnt allowed in the upcoming area?",
|
|
"answer": "vehicles bicycles skateboards glass containers animals reptiles",
|
|
"gt_answers": [
|
|
"Vehicles,bicycles,skates,skateboards,glass containers,animals/reptiles"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the train?",
|
|
"answer": "polson logging co",
|
|
"gt_answers": [
|
|
"Polson Logging co"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341912.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does this sign in front of this building read?",
|
|
"answer": "delhi high court",
|
|
"gt_answers": [
|
|
"DELHI HIGH COURT"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000502.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the taxi cab?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"2431"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000566395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which country is this used in?",
|
|
"answer": "china",
|
|
"gt_answers": [
|
|
"China"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03417042_17865.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many windshield wipers are on the vehicle?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03417042_17865.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of business is on the corner of the street?",
|
|
"answer": "bank",
|
|
"gt_answers": [
|
|
"Bank"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369773.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the Monticello Convention held?",
|
|
"answer": "1787",
|
|
"gt_answers": [
|
|
"1852"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04355338_2362.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the train?",
|
|
"answer": "transit",
|
|
"gt_answers": [
|
|
"TRANSIT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000211381.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the yellow shop?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Babos"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000483692.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the surf board?",
|
|
"answer": "hawaii",
|
|
"gt_answers": [
|
|
"Lifestyle"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are showing at the top of the image?",
|
|
"answer": "fire iron way",
|
|
"gt_answers": [
|
|
"FI"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371483.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the largest word on the plane say?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"express"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328061.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the poster?",
|
|
"answer": "free",
|
|
"gt_answers": [
|
|
"Cats"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_692.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "id",
|
|
"gt_answers": [
|
|
"178"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/286028.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the shop called?",
|
|
"answer": "james murphy",
|
|
"gt_answers": [
|
|
"P.T. MURPHY",
|
|
"P.T.MURPHY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2347.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the Airline on the plane",
|
|
"answer": "british airways",
|
|
"gt_answers": [
|
|
"British Airways"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000231572.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the distance of the two roads merging ?",
|
|
"answer": "100 meters",
|
|
"gt_answers": [
|
|
"100m",
|
|
"100 m"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356846.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "During what year was this image copyrighted?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000061992.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What station is this train stopped at?",
|
|
"answer": "london bridge",
|
|
"gt_answers": [
|
|
"COCKFOSTERS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the wings of the jet fighter planes?",
|
|
"answer": "the united states of america",
|
|
"gt_answers": [
|
|
"USAF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000170451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which airline is this?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3908.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color shirt is the man ordering tickets wearing?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Purple"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3908.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color tie is the man on the left wearing?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3908.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of remote is in the packaging?",
|
|
"answer": "universal remote",
|
|
"gt_answers": [
|
|
"UNIVERSAL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000137547.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on this stocking?",
|
|
"answer": "william",
|
|
"gt_answers": [
|
|
"William"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03026506_3500.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many shower caps are in this package?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04209133_10027.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "reserve bank of india",
|
|
"gt_answers": [
|
|
"Reserve Bank of India"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000993.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the skier out in front in the picture?",
|
|
"answer": "one",
|
|
"gt_answers": [
|
|
"33"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368096.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the plane?",
|
|
"answer": "korea",
|
|
"gt_answers": [
|
|
"KOREAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000493251.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the middle word on the plane?",
|
|
"answer": "air",
|
|
"gt_answers": [
|
|
"air"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000493251.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the side of the plane?",
|
|
"answer": "korea",
|
|
"gt_answers": [
|
|
"cargo"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000493251.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what name is in the logo on the man's shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"opper"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000171321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what company name appears on the man's hat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Topper"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000171321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what letters appear on the lower right corner of the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"KT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000171321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in the photo?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"42"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000224942.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the car's license plate?",
|
|
"answer": "ct",
|
|
"gt_answers": [
|
|
"DL9553"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333273.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the umbrellas?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Singha"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000167027.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the plaza on the sign?",
|
|
"answer": "person plaza",
|
|
"gt_answers": [
|
|
"Thailand Plaza",
|
|
"THAILAND"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000167027.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written in white at the bottom of the bus?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"City"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357817.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company building?",
|
|
"answer": "nokia",
|
|
"gt_answers": [
|
|
"Nokia"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000866.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the magazine?",
|
|
"answer": "vampire",
|
|
"gt_answers": [
|
|
"Vampirella"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_13599.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on ths sign in this picture?",
|
|
"answer": "electronics",
|
|
"gt_answers": [
|
|
"SAMSUNG"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_512.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of gum?",
|
|
"answer": "chewy",
|
|
"gt_answers": [
|
|
"Doublemint"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000549409.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the bus?",
|
|
"answer": "300",
|
|
"gt_answers": [
|
|
"300"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406968.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What port is the bus parked in front of?",
|
|
"answer": "st peter port",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406968.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the rear section of the airplane\ufffd\ufffds wing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"X-48B"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000403286.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text on the shirt say?",
|
|
"answer": "hot dog",
|
|
"gt_answers": [
|
|
"Digard"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403483.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What food is pictured here?",
|
|
"answer": "hot dogs",
|
|
"gt_answers": [
|
|
"Hot dog"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403483.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is this bus headed?",
|
|
"answer": "birmingham city centre",
|
|
"gt_answers": [
|
|
"Ecclesfield"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000223091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what company runs this bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"stagecoach stagecoach",
|
|
"STAGECOACH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000223091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the back left of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000220390.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When is the morning rubbish collection?",
|
|
"answer": "the morning rubbish collection is at 7am",
|
|
"gt_answers": [
|
|
"10.30am to noon",
|
|
"1030AM TO NOON"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When is the evening rubbish collection?",
|
|
"answer": "on tuesday and thursday",
|
|
"gt_answers": [
|
|
"10.00pm to midnight",
|
|
"1000PM TO MIDNIGHT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sticker on this sign say?",
|
|
"answer": "use of car horn here",
|
|
"gt_answers": [
|
|
"SOLVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355388.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "daniel",
|
|
"gt_answers": [
|
|
"Ivan Bustamante"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is referenced on the man's shirt?",
|
|
"answer": "chicago",
|
|
"gt_answers": [
|
|
"New York"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000197001.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in green on the side of the bus?",
|
|
"answer": "dog daycare",
|
|
"gt_answers": [
|
|
"Dog Daycare"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the baby care system?",
|
|
"answer": "pacifier dispenser",
|
|
"gt_answers": [
|
|
"KOALA KARE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000027298.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many bicycles are there?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Two",
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319317.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the hot air balloon advertising?",
|
|
"answer": "lavans",
|
|
"gt_answers": [
|
|
"Lavans"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02782093_39116.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is sponsoring the hot air balloon?",
|
|
"answer": "lavans",
|
|
"gt_answers": [
|
|
"Lavans"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02782093_39116.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the blue printed words on the hot air ballon?",
|
|
"answer": "lavans",
|
|
"gt_answers": [
|
|
"Lavans"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02782093_39116.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who holds the copyright?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Tony Northrup"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02444819_308.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the train operator?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Fret"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000027504.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what street is shown?",
|
|
"answer": "a street",
|
|
"gt_answers": [
|
|
"18th",
|
|
"18TH STREET"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406204.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign refer",
|
|
"answer": "refer",
|
|
"gt_answers": [
|
|
"amy581014"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03131574_1780.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does CSA stand for?",
|
|
"answer": "community supported agriculture",
|
|
"gt_answers": [
|
|
"Community Supported Agriculture"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405515.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in white say?",
|
|
"answer": "mcdonald's ice cream",
|
|
"gt_answers": [
|
|
"Jack Pot"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_4482.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Baseball"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337648.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is this a child or adult game?",
|
|
"answer": "child",
|
|
"gt_answers": [
|
|
"Child"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337648.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is their uniform?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"Green and yellow",
|
|
"Green & White"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000337648.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the bottom of the product?",
|
|
"answer": "swiss made",
|
|
"gt_answers": [
|
|
"Super"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02910353_10857.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the monument sign say?",
|
|
"answer": "st mary's cathedral",
|
|
"gt_answers": [
|
|
"Homo-monument"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276311.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what team is on the tshirt?",
|
|
"answer": "atlanta braves",
|
|
"gt_answers": [
|
|
"Braves"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358109.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word seen on the display?",
|
|
"answer": "liquor",
|
|
"gt_answers": [
|
|
"Liquor"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_11287.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Roger Price"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000028503.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this picture taken?",
|
|
"answer": "antique and vintage art gallery",
|
|
"gt_answers": [
|
|
"ANTIQUE & FINE ARTS AUCTIONS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000523571.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the sail?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"159"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04612504_15248.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is near the top of the sail?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Y"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04612504_15248.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fuel does the truck run on?",
|
|
"answer": "diesel",
|
|
"gt_answers": [
|
|
"natural gas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414566.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is timestamped on the photograph?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414566.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What agency captured this picture?",
|
|
"answer": "wyoming game and fish department",
|
|
"gt_answers": [
|
|
"Wyoming Game and Fish Department"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02443484_1894.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company owns this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Wyoming Game and Fish Department"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02443484_1894.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website listed on the bottom of the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"http://www.chuqui.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000496118.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the player wearing?",
|
|
"answer": "11",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414770.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is the tram travelling to?",
|
|
"answer": "the seafront",
|
|
"gt_answers": [
|
|
"STARR GATE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04335435_24175.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which country's Government is referred to in this image?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"Canada"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155981.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name for the silver object in the image?",
|
|
"answer": "plane iron",
|
|
"gt_answers": [
|
|
"Blade Or Iron"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03954731_40761.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant",
|
|
"answer": "silk",
|
|
"gt_answers": [
|
|
"Thai at Silk"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1359.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is in red?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"A"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the smallest text say?",
|
|
"answer": "a skateboarder in the air",
|
|
"gt_answers": [
|
|
"images"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is in pink?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"A"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029116.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this plane's license number?",
|
|
"answer": "it is a private plane",
|
|
"gt_answers": [
|
|
"PH-DEZ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352809.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top sign?",
|
|
"answer": "danforth ave",
|
|
"gt_answers": [
|
|
"DANFORTH AVE. 479"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331979.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the signs say?",
|
|
"answer": "no parking on the street",
|
|
"gt_answers": [
|
|
"Regardez",
|
|
"REGARDEZ A GAUCHE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000561735.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the truck's license plate number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"G81083"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000068777.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the web URL shown?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WWW.MAVERIX.ORG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000123711.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the red sign read?",
|
|
"answer": "park road east",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332282.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is the bus",
|
|
"answer": "8",
|
|
"gt_answers": [
|
|
"704"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406821.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in the top left corner?",
|
|
"answer": "bath",
|
|
"gt_answers": [
|
|
"corbis"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04553703_20667.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What booth are the guests visiting?",
|
|
"answer": "vlaser",
|
|
"gt_answers": [
|
|
"viLaser"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349174.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number",
|
|
"answer": "the train number is person",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000223288.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the words on the bus?",
|
|
"answer": "london red bus",
|
|
"gt_answers": [
|
|
"another red bus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000003988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red vehicle in the photo?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"bus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000003988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this buses number?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"24"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04146614_14916.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which number is a part of an add on the side of the bus?",
|
|
"answer": "999",
|
|
"gt_answers": [
|
|
"1.58"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04146614_14916.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is this?",
|
|
"answer": "papa johns",
|
|
"gt_answers": [
|
|
"Papa Johns"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1133.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the sign in this picture?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the text on the sign?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375982.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the largest word on the back of the blue shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Super"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000246719.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the front of the train?",
|
|
"answer": "73",
|
|
"gt_answers": [
|
|
"159106"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000443533.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 5 letters at the very top of the image?",
|
|
"answer": "afes",
|
|
"gt_answers": [
|
|
"AAFES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_12685.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of gas is found under the 4 stars?",
|
|
"answer": "premium gas",
|
|
"gt_answers": [
|
|
"UNLEADED"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_12685.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is this?",
|
|
"answer": "chicago",
|
|
"gt_answers": [
|
|
"Memphis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342646.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the letters on the blue shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"jcrew"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000145189.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this product?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SUSE LINUX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000563270.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What pain should you stop?",
|
|
"answer": "back pain",
|
|
"gt_answers": [
|
|
"Foot, Knee & Back"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_493.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the building labelled by the sign?",
|
|
"answer": "sydney railway station",
|
|
"gt_answers": [
|
|
"Fire Station"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000239040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT COLOR IS THE BUS",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"RED"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHERE IS IT GOING TO",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"PADDINGTON"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE BUS NUMBER",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"36"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4430.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photo tag?",
|
|
"answer": "chimpanzee",
|
|
"gt_answers": [
|
|
"Tony Northrup",
|
|
"\ufffd\ufffd TONY NORTHRUP"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02481823_4165.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What products does this store sell?",
|
|
"answer": "antiques",
|
|
"gt_answers": [
|
|
"ANTIQUES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06874185_29995.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the green boat?",
|
|
"answer": "numbers",
|
|
"gt_answers": [
|
|
"869SE5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407889.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the blue boat?",
|
|
"answer": "numbers",
|
|
"gt_answers": [
|
|
"4057SE5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407889.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo copyrighted?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403544.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign in the bus say?",
|
|
"answer": "london bus map",
|
|
"gt_answers": [
|
|
"Stopping"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000182104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is on the van?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"FedEx"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_334.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of that truck?",
|
|
"answer": "scania",
|
|
"gt_answers": [
|
|
"Scania"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345032.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the model of the truck?",
|
|
"answer": "scania",
|
|
"gt_answers": [
|
|
"R440"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345032.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the truck?",
|
|
"answer": "md md",
|
|
"gt_answers": [
|
|
"FJ60LGK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345032.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this store?",
|
|
"answer": "the fountain",
|
|
"gt_answers": [
|
|
"The Fountain?",
|
|
"The Fountain"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What service is provided while you wait?",
|
|
"answer": "an umbrella",
|
|
"gt_answers": [
|
|
"Shoe Repair"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat in the picture?",
|
|
"answer": "thunderchuck",
|
|
"gt_answers": [
|
|
"Thunderchick"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355027.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is displayed on the front of the building?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1900"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000473503.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the place listed above the witch character?",
|
|
"answer": "miami",
|
|
"gt_answers": [
|
|
"Moomin Valley"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04560804_16073.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the copyrights to this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"2013 FRANZ VENHAUS",
|
|
"c 2013 franz venhaus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000236176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company name of the bus?",
|
|
"answer": "city bus",
|
|
"gt_answers": [
|
|
"RapidRide"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4298.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what kind on vehicle is this ?",
|
|
"answer": "ambulance",
|
|
"gt_answers": [
|
|
"Rode Kruis"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03769881_2599.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the four yellow signs say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"JACO, OROTINA, ZARCERO, BAGACES",
|
|
"jaco orotina zarcero bagaces"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357671.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bike?",
|
|
"answer": "harley davidson",
|
|
"gt_answers": [
|
|
"Indian"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is carried by the blue truck?",
|
|
"answer": "corona extra",
|
|
"gt_answers": [
|
|
"Corona"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand off the white car",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Nissan"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4898.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the biggest letters on this bus say?",
|
|
"answer": "the party bus",
|
|
"gt_answers": [
|
|
"The Honesty Shop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/150374.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is on the bag?",
|
|
"answer": "nestle",
|
|
"gt_answers": [
|
|
"Nestle"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000836.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much protein does the food contain per 100 grams?",
|
|
"answer": "20 g",
|
|
"gt_answers": [
|
|
"9.2 grams"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000836.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company plane is this?",
|
|
"answer": "delta airlines",
|
|
"gt_answers": [
|
|
"Delta"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363863.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the gate number?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"A12"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363863.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the street signs read?",
|
|
"answer": "illinois and 2nd street",
|
|
"gt_answers": [
|
|
"1100 ILLINOIS 600 22ND ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand of the Computer to the far right?",
|
|
"answer": "acer",
|
|
"gt_answers": [
|
|
"sony"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000305501.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the weekly newsmagazine",
|
|
"answer": "outlook",
|
|
"gt_answers": [
|
|
"Outlook"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329122.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What cream cheese is there?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PHILADELPHIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000064389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word that is displayed inside the red circle?",
|
|
"answer": "pay",
|
|
"gt_answers": [
|
|
"FUR"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03902125_15097.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of phone is this?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"Siemens"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001141.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top street sign say?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"JOEY RAMONE PLACE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370496.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which street sign is higher than the other?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"TERRACE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the logo in the image say?",
|
|
"answer": "skateboards",
|
|
"gt_answers": [
|
|
"MyProfe"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362783.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bench say?",
|
|
"answer": "city of london",
|
|
"gt_answers": [
|
|
"City of London"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000103583.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how is the food seasoned?",
|
|
"answer": "lemon garlic",
|
|
"gt_answers": [
|
|
"lemon garlic"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001124.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words present in the banner",
|
|
"answer": "hello world",
|
|
"gt_answers": [
|
|
"Merry X-mas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350317.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which Australian region offers food similar to what you might find at this restaurant?",
|
|
"answer": "south australia",
|
|
"gt_answers": [
|
|
"OUTBACK"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the location on the map?",
|
|
"answer": "zell am see",
|
|
"gt_answers": [
|
|
"Zeilitzheim"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317055.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the blue sign?",
|
|
"answer": "map",
|
|
"gt_answers": [
|
|
"Raiffeisenbank"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317055.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "This red sign displays which brand name?",
|
|
"answer": "vodafone",
|
|
"gt_answers": [
|
|
"Vodafone"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001255.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two actions are not allowed in this specific location?",
|
|
"answer": "riding bicycles or skateboards",
|
|
"gt_answers": [
|
|
"TIPPING & DUMPING"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_15.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text at the bottom of the back of the vehicle",
|
|
"answer": "it says sweden",
|
|
"gt_answers": [
|
|
"Faymonville"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365194.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the sticker on the left-backside of the truck",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"60"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365194.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the punching bags?",
|
|
"answer": "dubat",
|
|
"gt_answers": [
|
|
"Tabo"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04023962_16417.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"55"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000324852.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE WORD WRITTEN ON THE CHAIR",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Coca-Cola"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000427555.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the top of the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"90251"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the front of the train say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Amtrak"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373267.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the bus",
|
|
"answer": "liberty liner",
|
|
"gt_answers": [
|
|
"VICTORY LINER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367237.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS NUMBER of THE BUS",
|
|
"answer": "number of the bus",
|
|
"gt_answers": [
|
|
"2119"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367237.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is letter written behind the bus",
|
|
"answer": "l",
|
|
"gt_answers": [
|
|
"p"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367237.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the train?",
|
|
"answer": "mta train",
|
|
"gt_answers": [
|
|
"7850"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team is on his shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Red Sox",
|
|
"SOX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in green on the white food truck ?",
|
|
"answer": "mx taco",
|
|
"gt_answers": [
|
|
"Maki Taco"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336452.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store in the background?",
|
|
"answer": "walmart",
|
|
"gt_answers": [
|
|
"Carphone Warehouse"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410379.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is displayed on the bottom right?",
|
|
"answer": "rainbow loom",
|
|
"gt_answers": [
|
|
"estrelaaa Paulinha"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03476684_13918.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is seen as heading on the blue book?",
|
|
"answer": "i am a computer",
|
|
"gt_answers": [
|
|
"Mountain Biking"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000062053.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photographers name?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Jordan Light"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03956157_7164.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first blue letter on the plane?",
|
|
"answer": "u",
|
|
"gt_answers": [
|
|
"n"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000166086.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is displayed on front of the bobsled?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"Whelen"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02860847_21360.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "A king is married to a what?",
|
|
"answer": "queen",
|
|
"gt_answers": [
|
|
"Queen"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015245.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of drink is there?",
|
|
"answer": "soda",
|
|
"gt_answers": [
|
|
"Arizona Fruit Punch."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437801.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is inside the brown case?",
|
|
"answer": "a guitar",
|
|
"gt_answers": [
|
|
"A guitar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed in red?",
|
|
"answer": "darkness",
|
|
"gt_answers": [
|
|
"Devil"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_6165.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who's name is in the title?",
|
|
"answer": "miss jones",
|
|
"gt_answers": [
|
|
"Miss Jones"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_6165.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name on the tag??",
|
|
"answer": "john",
|
|
"gt_answers": [
|
|
"Nick"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01735189_5072.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car manufacturer name is printed in the car?",
|
|
"answer": "toyota",
|
|
"gt_answers": [
|
|
"TOYOTA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341057.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the food truck?",
|
|
"answer": "havana food truck",
|
|
"gt_answers": [
|
|
"Caravan Crepes"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358974.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word shown in the blue neon sign box?",
|
|
"answer": "car",
|
|
"gt_answers": [
|
|
"open"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2042.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where was the wine bottled?",
|
|
"answer": "in argentina",
|
|
"gt_answers": [
|
|
"Spain"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018960.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the red sign?",
|
|
"answer": "a stop sign",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000288021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What movie is advertised on the window ?",
|
|
"answer": "exodus",
|
|
"gt_answers": [
|
|
"Exodus: Gods and Kings",
|
|
"EXODUS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_82.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "london street",
|
|
"gt_answers": [
|
|
"London Street W2",
|
|
"LONDON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000103390.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which city is it?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"Westminster"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000103390.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who sponsers this event?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"KIA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321697.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after Madison Square?",
|
|
"answer": "new",
|
|
"gt_answers": [
|
|
"Garden"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the black sign?",
|
|
"answer": "food selection",
|
|
"gt_answers": [
|
|
"Hotels"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_203.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text is on the red sign?",
|
|
"answer": "food selection",
|
|
"gt_answers": [
|
|
"Food directory"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_203.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in block lettes on the bike?",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"FORT LAUDERDALE POLICE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is that?",
|
|
"answer": "beauty store",
|
|
"gt_answers": [
|
|
"Aveda"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_563.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of Aveda store is that?",
|
|
"answer": "a retail store",
|
|
"gt_answers": [
|
|
"Team Salon Aveda",
|
|
"TEAM SALON"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_563.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE LICENSE PLATE OF THE BLUE BUS?",
|
|
"answer": "md md",
|
|
"gt_answers": [
|
|
"YNO3NCF",
|
|
"YNO3 NCF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN FRONT OF THE BLUE BUS",
|
|
"answer": "london bus",
|
|
"gt_answers": [
|
|
"ARRIVA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349683.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the remote?",
|
|
"answer": "channel",
|
|
"gt_answers": [
|
|
"One"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415871.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the tractor?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"130"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339412.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this animal?",
|
|
"answer": "cat",
|
|
"gt_answers": [
|
|
"cat"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000050794.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was this picture taken?",
|
|
"answer": "indian",
|
|
"gt_answers": [
|
|
"2010/02/09"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375052.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the bus?",
|
|
"answer": "a map of madrid",
|
|
"gt_answers": [
|
|
"MADRID CITY TOUR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000487167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the house being sold by?",
|
|
"answer": "owner",
|
|
"gt_answers": [
|
|
"Owner"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"2590105",
|
|
"259 0105"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What text message is written on the pink phone?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"I love you"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000106438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the tennis ball?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"yellow",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325333.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT NAME IN THE FIGURE?",
|
|
"answer": "cauliflower",
|
|
"gt_answers": [
|
|
"OLEKSIY MAKSYMENKO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07715103_8561.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What model car is sold at this location?",
|
|
"answer": "honda civic",
|
|
"gt_answers": [
|
|
"Honda"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000624.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer?",
|
|
"answer": "rajesh kumar",
|
|
"gt_answers": [
|
|
"Sanmoy Photography",
|
|
"SANMOY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/6599.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the warning sign?",
|
|
"answer": "zone",
|
|
"gt_answers": [
|
|
"ZONE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2407077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the train in the foreground?",
|
|
"answer": "050",
|
|
"gt_answers": [
|
|
"444 007"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000312541.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What drink brand is advertised on the green umbrella?",
|
|
"answer": "pepsi",
|
|
"gt_answers": [
|
|
"Heineken"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404959.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is listed on the white street sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000143749.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the building?",
|
|
"answer": "emporis",
|
|
"gt_answers": [
|
|
"Emporis"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Please stop unless cave trained",
|
|
"PLEASE STOP UNLESS CAVE TRAINED We Care!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many childrens on there",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1",
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372120.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what the children wear on head",
|
|
"answer": "helmet",
|
|
"gt_answers": [
|
|
"helmet"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372120.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many peoples on there",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"4",
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372120.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "In what city is this fire station?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Dallas"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000514454.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the station number on the building?",
|
|
"answer": "10",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000514454.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photo copyrighted to?",
|
|
"answer": "daniel mcdonald",
|
|
"gt_answers": [
|
|
"Nick Scobel"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01632458_702.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo copyrighted?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01632458_702.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the front of the bus?",
|
|
"answer": "bus",
|
|
"gt_answers": [
|
|
"Garage"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373266.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the tool set?",
|
|
"answer": "torc",
|
|
"gt_answers": [
|
|
"Royal Craft"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04154565_26257.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the neon sign say?",
|
|
"answer": "schaffy",
|
|
"gt_answers": [
|
|
"SCHLAFLY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340101.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the airline displayed on the tail fin of the airplane?",
|
|
"answer": "southwest airlines",
|
|
"gt_answers": [
|
|
"southwest"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000262492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the clock brand being advertised in the background?",
|
|
"answer": "seiko",
|
|
"gt_answers": [
|
|
"Rolex"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320304.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which country is the castle located?",
|
|
"answer": "scotland",
|
|
"gt_answers": [
|
|
"Scotland"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02980441_30054.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written inside the octagon?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414585.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What food will she travel for?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Vegan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559836.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first 3 numbers on the taxi's license plate?",
|
|
"answer": "123",
|
|
"gt_answers": [
|
|
"389"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the sticker on the taxi front door?",
|
|
"answer": "cab driver",
|
|
"gt_answers": [
|
|
"Taxi Fare"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329280.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does word is on the red sign?",
|
|
"answer": "alto",
|
|
"gt_answers": [
|
|
"ALTO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559755.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which street is in the 1000 direction?",
|
|
"answer": "schrader",
|
|
"gt_answers": [
|
|
"Schrader"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400399.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which street is in the 150 direction?",
|
|
"answer": "schrader",
|
|
"gt_answers": [
|
|
"Carl"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2400399.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"573"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317389.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter on the player's hats?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"D",
|
|
"B"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000242074.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What product is shown in the image?",
|
|
"answer": "shea cashmere body cream",
|
|
"gt_answers": [
|
|
"Body Cream"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014404.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much does the product weigh?",
|
|
"answer": "1 oz",
|
|
"gt_answers": [
|
|
"198 g"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014404.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big word on the white sign?",
|
|
"answer": "insead",
|
|
"gt_answers": [
|
|
"INSEAD"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_30.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is on the building?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Yates Wine Lodge",
|
|
"YATISS WINE LODGE"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/168.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business?",
|
|
"answer": "frutta fresca",
|
|
"gt_answers": [
|
|
"Frutta fresca"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329380.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building?\"",
|
|
"answer": "train station",
|
|
"gt_answers": [
|
|
"Garibaldi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359592.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the train?",
|
|
"answer": "the train has no number",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359592.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of car is on the sign?",
|
|
"answer": "saab",
|
|
"gt_answers": [
|
|
"Saab"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000367335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "The building is for the reserve bank of which country?",
|
|
"answer": "india",
|
|
"gt_answers": [
|
|
"India"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000994.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title given to this photo?",
|
|
"answer": "infinite danger",
|
|
"gt_answers": [
|
|
"imminent danger"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000151259.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where was the photo taken?",
|
|
"answer": "tokyo",
|
|
"gt_answers": [
|
|
"zeros cape",
|
|
"zeros - cape"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000151259.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the soccer club shown on the board?",
|
|
"answer": "manchester united",
|
|
"gt_answers": [
|
|
"Real Madrid CF",
|
|
"RED MADRID CF"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04149813_9247.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what store in on the right?",
|
|
"answer": "dillards",
|
|
"gt_answers": [
|
|
"adidas"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_679.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of fuel should only be used?",
|
|
"answer": "unleaded gasoline",
|
|
"gt_answers": [
|
|
"Unleaded"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03841143_28510.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is inside of the heart?",
|
|
"answer": "one",
|
|
"gt_answers": [
|
|
"350"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000103575.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the sign?",
|
|
"answer": "no bicycles allowed",
|
|
"gt_answers": [
|
|
"NO NEEN NEIN PAS DE VTT DANS LE HAMEAU"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/5671.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the items?",
|
|
"answer": "shanghai luoyang swimwear factory",
|
|
"gt_answers": [
|
|
"Lipotes"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02807133_8098.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is displayed?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"alibaba.com.cn"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02807133_8098.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bookstore?",
|
|
"answer": "oxam",
|
|
"gt_answers": [
|
|
"Oxfam Bookshop"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02871525_19356.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who are they looking to hire?",
|
|
"answer": "a manager",
|
|
"gt_answers": [
|
|
"Volunteers"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02871525_19356.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the side of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Bruckmann"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000557272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What business is the man with the phone in front of?",
|
|
"answer": "a restaurant",
|
|
"gt_answers": [
|
|
"marine"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409772.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this bus going?",
|
|
"answer": "to pick up passengers",
|
|
"gt_answers": [
|
|
"Fullerton"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is before the word Fullerton?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"43"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in the window of the bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"4311"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344564.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photo?",
|
|
"answer": "me",
|
|
"gt_answers": [
|
|
"Michael Hanscom"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000578387.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are written on the wall in red?",
|
|
"answer": "tiw",
|
|
"gt_answers": [
|
|
"TNW"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/286032.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the car model name displayed across the top?",
|
|
"answer": "ford model t road trip",
|
|
"gt_answers": [
|
|
"Model T"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03777568_17058.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue text displayed across the top say?",
|
|
"answer": "model t road trip",
|
|
"gt_answers": [
|
|
"Road Trip"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03777568_17058.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What abbreviated letters are written on the license plate?",
|
|
"answer": "rv",
|
|
"gt_answers": [
|
|
"BS DUDES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04065272_19543.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is printed on the red sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"Slow"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_5.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who wrote The human fly",
|
|
"answer": "robert harris",
|
|
"gt_answers": [
|
|
"T.C. BOYLE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000286009.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the light blue banner read?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Olympus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000023015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the worker holding?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"Stop sign"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340007.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is on the signs?",
|
|
"answer": "pakistan",
|
|
"gt_answers": [
|
|
"Pakistan"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sign say?",
|
|
"answer": "road closed",
|
|
"gt_answers": [
|
|
"EASY STREET"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315943.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue sign say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"HEADER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315943.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What medical company is advertised on the light blue sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"EMPIRE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000104328.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What tv company is advertised on the red sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"bravia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000104328.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does W.B.Mason sell?",
|
|
"answer": "baseball bats",
|
|
"gt_answers": [
|
|
"OFFICE supplies"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000104328.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the yellow tarp?",
|
|
"answer": "rain",
|
|
"gt_answers": [
|
|
"Dispatch"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign number for Marina Bay?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_54.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the sign number for Dhoby Ghaut?",
|
|
"answer": "ghaut",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_54.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the month during which this competition is taking place?",
|
|
"answer": "august",
|
|
"gt_answers": [
|
|
"February"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02107574_2717.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the year during which this competition is taking place?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2004"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02107574_2717.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the club that is hosting this competition?",
|
|
"answer": "swiss mountain dog club",
|
|
"gt_answers": [
|
|
"Colorado Kennel Club"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02107574_2717.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the right-most remote?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"telefunken"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000152070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bus company?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CHI BOY",
|
|
"CHI-BOY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000021253.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the building?",
|
|
"answer": "a hospital",
|
|
"gt_answers": [
|
|
"Graduate student center",
|
|
"RICHARD A. AUHLL GRADUATE STUDENT CENTER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/136.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of monster is it?",
|
|
"answer": "gila monster",
|
|
"gt_answers": [
|
|
"Gila",
|
|
"GILA MONSTER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01692333_1235.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many calories per serving does this product have?",
|
|
"answer": "100 calories per serving",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006885.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the listed serving size on the label?",
|
|
"answer": "1 cup",
|
|
"gt_answers": [
|
|
"1/2 cup"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006885.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is displayed on the black sign?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"880"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000118150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store behind all these dipshits?",
|
|
"answer": "pam",
|
|
"gt_answers": [
|
|
"Pam"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413770.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is WERTH wearing?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"28"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000013362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT COLOUR IS THEWALL",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"YELLOW"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN THE PICTURE",
|
|
"answer": "it is very dirty japanese bathroom",
|
|
"gt_answers": [
|
|
"IT IS VERY DIRTY JAPANESE BATHROOM",
|
|
"it is very dirty Japanese bathroom."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335226.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the abbreviated day of the week?",
|
|
"answer": "d",
|
|
"gt_answers": [
|
|
"TUE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03706229_5078.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the maker of the watch?",
|
|
"answer": "casio",
|
|
"gt_answers": [
|
|
"CASIO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03706229_5078.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which word appears on the yellow sign on the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Square"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000236224.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the avenue on the bottom street sign?",
|
|
"answer": "purdue avenue",
|
|
"gt_answers": [
|
|
"Purdue",
|
|
"PURDUE AVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366148.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the route displayed at the top of the bus?",
|
|
"answer": "the route is displayed on the bus",
|
|
"gt_answers": [
|
|
"L SHUTTLE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360164.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name written at the bottom?",
|
|
"answer": "mario",
|
|
"gt_answers": [
|
|
"Maria"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372381.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of ovens are in the store?",
|
|
"answer": "pizza ovens",
|
|
"gt_answers": [
|
|
"pizza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what brand of appliances are available?",
|
|
"answer": "kenmore",
|
|
"gt_answers": [
|
|
"bari"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What cartoon character is on the mug?",
|
|
"answer": "peanuts",
|
|
"gt_answers": [
|
|
"Snoopy"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000458748.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what you seen text in scat board stage?",
|
|
"answer": "scat board stage",
|
|
"gt_answers": [
|
|
"STANCE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713863.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What movie is advertised on the bus?",
|
|
"answer": "star wars",
|
|
"gt_answers": [
|
|
"Final Destination"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342146.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is advertised below the clock?",
|
|
"answer": "a sailboat",
|
|
"gt_answers": [
|
|
"Colgate"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318221.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the player in #22 Jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CHAMBLESS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000535669.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the warning sign ask you to do?",
|
|
"answer": "slow down",
|
|
"gt_answers": [
|
|
"slow down"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much euors are the Pendant watches?",
|
|
"answer": "200 eu",
|
|
"gt_answers": [
|
|
"six"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3107.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the bigger sign say?",
|
|
"answer": "no photography",
|
|
"gt_answers": [
|
|
"no photos"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3107.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the building",
|
|
"answer": "super 8 motel",
|
|
"gt_answers": [
|
|
"Super 8 Motel"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who won?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"YOU"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000308180.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the yellow word on the red sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"War"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word in the website tag?",
|
|
"answer": "rv",
|
|
"gt_answers": [
|
|
"Motorhomes"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04065272_27360.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What vintage poster is on the wall?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Coca Cola"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000109145.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what website do i go to?",
|
|
"answer": "youtube",
|
|
"gt_answers": [
|
|
"creativecommons.org"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02787622_5276.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the banjo label say?",
|
|
"answer": "some rights reserved",
|
|
"gt_answers": [
|
|
"Some rights reserved"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02787622_5276.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the needle pointing closest to?",
|
|
"answer": "12",
|
|
"gt_answers": [
|
|
"1000"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02794156_872.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price listed on the \"Special Offer\" sign?",
|
|
"answer": "\u00a3",
|
|
"gt_answers": [
|
|
"$4.99"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_962.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what type of bra is advertised?",
|
|
"answer": "strapless",
|
|
"gt_answers": [
|
|
"strapless"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892767_18140.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of bra is shown?",
|
|
"answer": "strapless",
|
|
"gt_answers": [
|
|
"strapless"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892767_18140.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Wonder View Dr"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000455010.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the street sign?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"3500"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000455010.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the bottom of the snowboard?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FORUM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000005916.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many vehicles are in this picture?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001182.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What amusement park is written above the large sphere?",
|
|
"answer": "epcot",
|
|
"gt_answers": [
|
|
"Epcot"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03388043_788.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is next to the arrow pointing left?",
|
|
"answer": "ski",
|
|
"gt_answers": [
|
|
"Whitetail"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is next to the arrow pointing right?",
|
|
"answer": "ski",
|
|
"gt_answers": [
|
|
"Redhawk"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405121.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word after the box that is marked?",
|
|
"answer": "guest",
|
|
"gt_answers": [
|
|
"Incredible"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015002.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the backpack?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"JONAS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000298692.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is located on the bus tire?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GOOD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000298692.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the phone number for the food truck?",
|
|
"answer": "888-deli-america",
|
|
"gt_answers": [
|
|
"571-458-9732"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368432.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of food is served from the food truck?",
|
|
"answer": "fast food",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368432.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is number on white shirt",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"41"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000278710.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many sides are on the STOP sign",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is depicted here?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Coca Cola",
|
|
"COCA-COLA"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage of your daily fiber needs does this food supply?",
|
|
"answer": "9 %",
|
|
"gt_answers": [
|
|
"46%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000560.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is the airline from?",
|
|
"answer": "japan",
|
|
"gt_answers": [
|
|
"JAPAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which company is this plane working for?",
|
|
"answer": "japan airlines",
|
|
"gt_answers": [
|
|
"JAPAN airlines"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in the red bird on the planes tail?",
|
|
"answer": "japan",
|
|
"gt_answers": [
|
|
"jal"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000559438.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the words say on the front of the bus in green?",
|
|
"answer": "sydney sightseeing",
|
|
"gt_answers": [
|
|
"Not in service"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many wheels of the bus are visible?",
|
|
"answer": "four",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the egg carton?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"OUR FARM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000338988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many eggs are in the egg carton as shown on its lid?",
|
|
"answer": "12",
|
|
"gt_answers": [
|
|
"ONE DOZEN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000338988.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the black train's number?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"73",
|
|
"SEVENTY THREE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is mini?",
|
|
"answer": "dog",
|
|
"gt_answers": [
|
|
"STORAGE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369089.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which word is shown at the top of the watch?",
|
|
"answer": "protection",
|
|
"gt_answers": [
|
|
"Protection"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03197337_4275.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is shown on the watch?",
|
|
"answer": "digital time",
|
|
"gt_answers": [
|
|
"13:01:14",
|
|
"13:01 14"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03197337_4275.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date shown on the watch?",
|
|
"answer": "the date is shown on the dial",
|
|
"gt_answers": [
|
|
"8-24",
|
|
"FR 8-24"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03197337_4275.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes this product?",
|
|
"answer": "the william hill company",
|
|
"gt_answers": [
|
|
"MELBOURN BR"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_1695.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flavor is this product?",
|
|
"answer": "apple",
|
|
"gt_answers": [
|
|
"APRICOT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_1695.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white banner say?",
|
|
"answer": "dominion",
|
|
"gt_answers": [
|
|
"Stage Sponsor"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03721384_9895.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the blue banner say?",
|
|
"answer": "dominion",
|
|
"gt_answers": [
|
|
"Dominion It all starts here."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03721384_9895.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company?",
|
|
"answer": "tipper's",
|
|
"gt_answers": [
|
|
"Tippers"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000102004.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What locations are shown in the upper left portion of the picture?",
|
|
"answer": "paris france",
|
|
"gt_answers": [
|
|
"PARIS VINCENNES"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000566839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What other URL is shown on the banner with the blue background in the upper left portion of the photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CASAQLIGUE.FR"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000566839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is available for purchase in the building across the street?",
|
|
"answer": "bikes",
|
|
"gt_answers": [
|
|
"FOOD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000102506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the model number of the street light?",
|
|
"answer": "the model number of the street light is sml",
|
|
"gt_answers": [
|
|
"K1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000102506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal sign direct you to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business that operates out of this building?",
|
|
"answer": "a restaurant",
|
|
"gt_answers": [
|
|
"La Piramibe"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4912.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the photographer who shot this photograph?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Versaci"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4912.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company branded this toy?",
|
|
"answer": "toyota",
|
|
"gt_answers": [
|
|
"Mac Tools",
|
|
"MAL TOOLS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02966687_4242.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What dish is the sauce for?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"pizza"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000381945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word of the brand name of the can?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"muir"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000381945.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the red sign?",
|
|
"answer": "ferrari",
|
|
"gt_answers": [
|
|
"Santander"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417794.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many cats are there?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343208.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text say?",
|
|
"answer": "a view of a river and mountains",
|
|
"gt_answers": [
|
|
"HoangLong photo"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/5590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bus' tour company?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"big bus tours"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000388935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where does the tour take place?",
|
|
"answer": "in london",
|
|
"gt_answers": [
|
|
"london"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000388935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company in the building on the right",
|
|
"answer": "adobe",
|
|
"gt_answers": [
|
|
"Topshelf Clothing"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2711.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of business is North Moore?",
|
|
"answer": "industrial",
|
|
"gt_answers": [
|
|
"Catering"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/2711.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team does the boys with yellow shirts play for?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ISB"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000447292.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the inn?",
|
|
"answer": "the fox",
|
|
"gt_answers": [
|
|
"The Pickerel",
|
|
"THE PICKEREL INN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2851.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the side of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"tindo"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000293074.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the watermark written on the lower left hand corner of the image?",
|
|
"answer": "a city",
|
|
"gt_answers": [
|
|
"AMZ Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Q: What brand of jeans is the skater wearing? A: element",
|
|
"answer": "long answer a element",
|
|
"gt_answers": [
|
|
"element"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000162654.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of this product?",
|
|
"answer": "therapy systems emergency treatment cream",
|
|
"gt_answers": [
|
|
"Therapy Systems"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04357314_1828.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in red in the black ring around the bottom?",
|
|
"answer": "therapy systems emergency treatment cream",
|
|
"gt_answers": [
|
|
"Emergency Treatment Cream"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04357314_1828.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference?",
|
|
"answer": "stop look and listen",
|
|
"gt_answers": [
|
|
"railroad crossing stop look and listen"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352003.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the boat",
|
|
"answer": "the name of the boat",
|
|
"gt_answers": [
|
|
"Miss Grace"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What six numbers are visible?",
|
|
"answer": "the train number",
|
|
"gt_answers": [
|
|
"222003"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316423.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the train headed to?",
|
|
"answer": "a city",
|
|
"gt_answers": [
|
|
"Sheffield"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316423.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what word is printed on the from of the dirt bike?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CARAS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000329469.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sports team is the boy's t-shirt advertising?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Miami Eagles"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000443082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the tail of this airliner have on it?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"AA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000281262.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say in the tower?",
|
|
"answer": "train station",
|
|
"gt_answers": [
|
|
"MUNCHEN HBF",
|
|
"MUNCHEN-HBF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375817.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the crockpot?",
|
|
"answer": "crock pot",
|
|
"gt_answers": [
|
|
"Rival"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03133878_1242.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word after the copyright symbol?",
|
|
"answer": "coral",
|
|
"gt_answers": [
|
|
"Jeff"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02607072_5165.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word after the copyright symbol?",
|
|
"answer": "coral",
|
|
"gt_answers": [
|
|
"Williams"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02607072_5165.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the full URL listed?",
|
|
"answer": "http://www",
|
|
"gt_answers": [
|
|
"JeffWilliamsNeptunesArt.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02607072_5165.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many tracks are at the railway crossing?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2 tracks",
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the beand written in the front of the locomotive?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"metronom"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number written on the locomotive?",
|
|
"answer": "nr",
|
|
"gt_answers": [
|
|
"146 531-9"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the name of the hotel?",
|
|
"answer": "galaxy hotel",
|
|
"gt_answers": [
|
|
"Galaxy",
|
|
"Galaxy Hotel"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000465.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the shirt say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SURF",
|
|
"SURF THE FALL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000369362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company took the picture?",
|
|
"answer": "scott scott photography",
|
|
"gt_answers": [
|
|
"Purple Martini",
|
|
"PURPLE MARTINI WEDDING PHOTOGRAPHY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7507.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of photography does Purple Martini do?",
|
|
"answer": "wedding photography",
|
|
"gt_answers": [
|
|
"Wedding Photography",
|
|
"WEDDING"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7507.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "carousel",
|
|
"gt_answers": [
|
|
"Tickets"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02966193_31844.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What clothing company typed on the woman's shirt?",
|
|
"answer": "american apparel",
|
|
"gt_answers": [
|
|
"VANS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000418185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the bottom sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"OPPOSING TRAFFIC DOES NOT STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334638.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is being sold at the counter?",
|
|
"answer": "meat",
|
|
"gt_answers": [
|
|
"Meat"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_963.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the building on the right?",
|
|
"answer": "surf in the city",
|
|
"gt_answers": [
|
|
"UBS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592782.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the stripe on the blue shirts/",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"white",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406498.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many players are there?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"three",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406498.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top word on the sign?",
|
|
"answer": "place",
|
|
"gt_answers": [
|
|
"Place"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362798.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the side of the train?",
|
|
"answer": "c40-8",
|
|
"gt_answers": [
|
|
"8446"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325228.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say on the motorcycle?",
|
|
"answer": "it says vintage motorcycle",
|
|
"gt_answers": [
|
|
"335"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330992.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the black sign say on the motorcycle?",
|
|
"answer": "i am a motorcycle",
|
|
"gt_answers": [
|
|
"FH2949"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330992.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the stop light?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000120077.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green banner say?",
|
|
"answer": "dream",
|
|
"gt_answers": [
|
|
"ITALIAN ICE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the beige sign say?",
|
|
"answer": "dream",
|
|
"gt_answers": [
|
|
"Cheese Dream"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412365.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name?",
|
|
"answer": "elm street",
|
|
"gt_answers": [
|
|
"22 3/4 ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which is the avenue?",
|
|
"answer": "the avenue",
|
|
"gt_answers": [
|
|
"It is 8th Ave/CTH D.",
|
|
"8TH AVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this Barleywine?",
|
|
"answer": "hamilton spot",
|
|
"gt_answers": [
|
|
"TORS HAMMER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336698.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is listed on the tail of the bottom plane?",
|
|
"answer": "space shuttle",
|
|
"gt_answers": [
|
|
"NASA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333654.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358198.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the lable",
|
|
"answer": "a banana",
|
|
"gt_answers": [
|
|
"ALVIN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000576517.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the ear tag number of this cow?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"9420"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000305431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the back of the plane?",
|
|
"answer": "space shuttle",
|
|
"gt_answers": [
|
|
"NASA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416592.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this function showcaseing?",
|
|
"answer": "a decorated toilet",
|
|
"gt_answers": [
|
|
"Decorated Thrones"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417795.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on the projector?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"BENQ"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04009552_13027.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bus",
|
|
"answer": "san francisco bay area transit",
|
|
"gt_answers": [
|
|
"1955"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000262442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus location route",
|
|
"answer": "the bus is in downtown los angeles",
|
|
"gt_answers": [
|
|
"Old Pasadena",
|
|
"PASADENA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000262442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of shoes are shown on the sign?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"converse"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000521652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this photograph?",
|
|
"answer": "julian photography",
|
|
"gt_answers": [
|
|
"Jason Milich"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338756.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written at the bottom of the image?",
|
|
"answer": "julian photography",
|
|
"gt_answers": [
|
|
"Jason Milich Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338756.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is visible on the digital display?",
|
|
"answer": "fahrenheit or celsius",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the switch currently set on?",
|
|
"answer": "on",
|
|
"gt_answers": [
|
|
"auto"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word appears below the word 'room'?",
|
|
"answer": "temperature",
|
|
"gt_answers": [
|
|
"set"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001370.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the box of paper?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"xerox"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000146700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is in the image?",
|
|
"answer": "tiffany and co",
|
|
"gt_answers": [
|
|
"Asier Spring",
|
|
"ASTER SPRING"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_379.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which is the number written on the t-shirt?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355245.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What tow numbers come after 50?",
|
|
"answer": "60 and 70",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348045.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the man's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000236186.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of bus is shown?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"School Bus"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337678.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the tooth brush brand?",
|
|
"answer": "crest",
|
|
"gt_answers": [
|
|
"Oral-B"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369071.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the product in the green container?",
|
|
"answer": "activi",
|
|
"gt_answers": [
|
|
"activia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000105643.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What watermark is displayed?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"austin woods"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02870880_117.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What zone is in the green circle?",
|
|
"answer": "the green circle is zone 1",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000491440.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What stop is depicted?",
|
|
"answer": "wharf street",
|
|
"gt_answers": [
|
|
"156"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000491440.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of building is the right building?",
|
|
"answer": "apartment building",
|
|
"gt_answers": [
|
|
"Hotel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2688.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the back of the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"200"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000121535.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word written at the bottom of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Peter"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000520765.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What phrase is printed on the garbage container?",
|
|
"answer": "archaeology is rubbish",
|
|
"gt_answers": [
|
|
"ARCHAEOLOGY IS RUBBISH"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02747177_2230.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the quote on the trash bin?",
|
|
"answer": "archaeology is rubbish",
|
|
"gt_answers": [
|
|
"Archaeology is rubbish"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02747177_2230.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does the clock display?",
|
|
"answer": "it displays the time",
|
|
"gt_answers": [
|
|
"11:38"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03196217_8034.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of vehicle?",
|
|
"answer": "tow truck",
|
|
"gt_answers": [
|
|
"HARRIS'S AMUSE MENTS",
|
|
"HARRIS'S AMUSEMENTS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332432.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What bold word is dark blue on the blue bag?",
|
|
"answer": "advance",
|
|
"gt_answers": [
|
|
"ADVANCE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02096294_1019.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are in the middle of the globe?",
|
|
"answer": "advance",
|
|
"gt_answers": [
|
|
"FCI"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02096294_1019.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cost of VERMUT?",
|
|
"answer": "it is sold by the litre",
|
|
"gt_answers": [
|
|
"3,70"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4573.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cost of VI MISSA?",
|
|
"answer": "the price of a bottle of vi missa varies depending on the vintage",
|
|
"gt_answers": [
|
|
"260"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4573.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the cost of BLANC?",
|
|
"answer": "the cost of blanc varies depending on where you buy it",
|
|
"gt_answers": [
|
|
"155"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4573.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the planes/",
|
|
"answer": "biplane",
|
|
"gt_answers": [
|
|
"Breitling"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is the stop sign?",
|
|
"answer": "in front of the building",
|
|
"gt_answers": [
|
|
"war"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000308579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What the yellow words say?",
|
|
"answer": "the beach",
|
|
"gt_answers": [
|
|
"adi home stay"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000170464.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the left side of the cup?",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"Coke"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352647.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the right side of the cup?",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"Coca-Cola"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352647.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the runway marker?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"B 09-27"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000424481.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the library?",
|
|
"answer": "sydney central library",
|
|
"gt_answers": [
|
|
"Cockburn"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343018.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the traffic sign say?",
|
|
"answer": "dc law",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322004.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the front of the shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Marine"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000581181.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign with the arrow read?",
|
|
"answer": "one day",
|
|
"gt_answers": [
|
|
"One Day"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316646.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the higher jumper wearing?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04540053_7860.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the airplane?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"LY-LPG"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331856.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the side of the black box?",
|
|
"answer": "s",
|
|
"gt_answers": [
|
|
"P"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03891332_3736.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word written at the bottom of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"quyen",
|
|
"ouyen"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000505963.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the billboard says that starts with letter V",
|
|
"answer": "vietnam tourism",
|
|
"gt_answers": [
|
|
"VIECHA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000044017.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street was this image taken near?",
|
|
"answer": "greenock cut",
|
|
"gt_answers": [
|
|
"Patrick Street"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416621.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit?",
|
|
"answer": "35",
|
|
"gt_answers": [
|
|
"35"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000425439.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this street?",
|
|
"answer": "paris avenue",
|
|
"gt_answers": [
|
|
"paris Ave"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000171082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the traffic advice on the sign?",
|
|
"answer": "no through road",
|
|
"gt_answers": [
|
|
"no Through road"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000171082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE WHITE BUS NUMNER/",
|
|
"answer": "the white bus number/ short answer",
|
|
"gt_answers": [
|
|
"H-192"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "HOW MANY PEOPLE ARE STANDING OUTSIDE THE BUS?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN THE FRONT OF WHITE BUS?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"HOTARD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many mL can this container measure?",
|
|
"answer": "100 ml",
|
|
"gt_answers": [
|
|
"1000 mL",
|
|
"1000"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02815834_1187.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the bat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"113"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000550478.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What board game is being played?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Sequence"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000041252.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this shop?",
|
|
"answer": "starbucks coffee",
|
|
"gt_answers": [
|
|
"Starbucks Coffee"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000350.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this building called?",
|
|
"answer": "new york stock exchange",
|
|
"gt_answers": [
|
|
"NEW YORK STOCK EXCHANGE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000102899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What store is on the corner?",
|
|
"answer": "city sports",
|
|
"gt_answers": [
|
|
"City Sports"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1863.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What school is nearby?",
|
|
"answer": "city sports",
|
|
"gt_answers": [
|
|
"Lesley University"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1863.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text on the license plate?",
|
|
"answer": "san diego",
|
|
"gt_answers": [
|
|
"2DTW337"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358487.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the hotel?",
|
|
"answer": "wynn hotel",
|
|
"gt_answers": [
|
|
"CONGRESS HOTEL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372405.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336104.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of car is passing by?",
|
|
"answer": "taxi",
|
|
"gt_answers": [
|
|
"NYC TAXI",
|
|
"taxi"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416422.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is it?",
|
|
"answer": "the lock screen",
|
|
"gt_answers": [
|
|
"21:12"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What day of the week is it?",
|
|
"answer": "it's monday",
|
|
"gt_answers": [
|
|
"Saturday"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage is the battery at?",
|
|
"answer": "it's at 0 percent",
|
|
"gt_answers": [
|
|
"77%"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What part of the military does this plane belong to?",
|
|
"answer": "the royal air force",
|
|
"gt_answers": [
|
|
"ARMY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is in the center of the plane?",
|
|
"answer": "b",
|
|
"gt_answers": [
|
|
"S"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/892.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website written on the bottom left corner?",
|
|
"answer": "youtube",
|
|
"gt_answers": [
|
|
"FreakingNews.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02123159_9882.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is the Tavuk Doner on the food cart?",
|
|
"answer": "10 lira",
|
|
"gt_answers": [
|
|
"2",
|
|
"2.00 TL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000019236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is below the eggplant in the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Sev"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000019236.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the horse rider?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"853"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343933.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website on the wall?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"aps.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343933.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is the manual from?",
|
|
"answer": "australia",
|
|
"gt_answers": [
|
|
"Japan"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04238763_22502.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What date is displayed in the picture at the bottom left hand corner?",
|
|
"answer": "january 1",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362948.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand name?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"iHome"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time is displayed on the clock?",
|
|
"answer": "current time",
|
|
"gt_answers": [
|
|
"3:24"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blender?",
|
|
"answer": "vitamix",
|
|
"gt_answers": [
|
|
"Osterizer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403635.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is shown on the jersey?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"92"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of spice chai is this tea?",
|
|
"answer": "tiger spice chai",
|
|
"gt_answers": [
|
|
"Tiger Spice",
|
|
"Tiger"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the net weight in oz?",
|
|
"answer": "16 oz",
|
|
"gt_answers": [
|
|
"14"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2410324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of skis is the man wearing his sunglasses carrying?",
|
|
"answer": "k2",
|
|
"gt_answers": [
|
|
"fischer fischer",
|
|
"fischer"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000273951.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the guest house?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"SUNRISE GUEST HOUSE",
|
|
"SUNRISE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000017472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters appear on the wall behind the cow?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"STD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000017472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the course?",
|
|
"answer": "santa rita disc golf course",
|
|
"gt_answers": [
|
|
"Buena Vista Disc Golf Course"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415987.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is this Ambergate Carnival?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_16599.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where can you find the Salty Dawg Saloon?",
|
|
"answer": "atlanta, georgia",
|
|
"gt_answers": [
|
|
"Homer Spit Alaska"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360865.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was Rory's established?",
|
|
"answer": "1939",
|
|
"gt_answers": [
|
|
"1959"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3462.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of this product?",
|
|
"answer": "organic sugar",
|
|
"gt_answers": [
|
|
"Trader Joe's"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03871628_21957.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the label in the lower left hand corner indicate?",
|
|
"answer": "organic sugar",
|
|
"gt_answers": [
|
|
"USDA organic"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03871628_21957.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is Trader Joe's organic sugar made of?",
|
|
"answer": "a blend of cane and beet sugar",
|
|
"gt_answers": [
|
|
"Evaporated Cane Juice"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03871628_21957.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "x1",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414492.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where can the letters ALU be found in this photo?",
|
|
"answer": "alu alu",
|
|
"gt_answers": [
|
|
"alu"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000322445.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign at the top of the restaurant read?",
|
|
"answer": "asian market",
|
|
"gt_answers": [
|
|
"Sushi"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_965.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the magazine title?",
|
|
"answer": "wood",
|
|
"gt_answers": [
|
|
"Making"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03954731_10167.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last two words of the magazine title?",
|
|
"answer": "wood planes",
|
|
"gt_answers": [
|
|
"Wood Planes"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03954731_10167.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the television commercial for?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Liberty"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000215398.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bike?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"629"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000282015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What county is this bus from?",
|
|
"answer": "lee county",
|
|
"gt_answers": [
|
|
"Loudoun County",
|
|
"Loudoun"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411247.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter written on the baseball caps?",
|
|
"answer": "j",
|
|
"gt_answers": [
|
|
"J"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000140940.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What movie are these donuts from?",
|
|
"answer": "toy story",
|
|
"gt_answers": [
|
|
"THE SIMPSONS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355097.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who's name is on the picture?",
|
|
"answer": "lauren f",
|
|
"gt_answers": [
|
|
"James S. Ferguson"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336947.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year would you say this picture was taken?",
|
|
"answer": "2007",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336947.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the complete text in the photo?",
|
|
"answer": "bananas growing on a tree",
|
|
"gt_answers": [
|
|
"James S. Ferguson 2007",
|
|
"\ufffd\ufffd JAMES S. FERGUSON 2007"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336947.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference?",
|
|
"answer": "a board of directors",
|
|
"gt_answers": [
|
|
"jord"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357215.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text in the picture?",
|
|
"answer": "hamster",
|
|
"gt_answers": [
|
|
"hesedetang",
|
|
"\ufffd\ufffd HESEDETANG *"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02342885_11074.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the animal eating",
|
|
"answer": "broccoli",
|
|
"gt_answers": [
|
|
"broccoli"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02342885_11074.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit?",
|
|
"answer": "30 mph",
|
|
"gt_answers": [
|
|
"40"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_228.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow street sign say?",
|
|
"answer": "attention college",
|
|
"gt_answers": [
|
|
"ATENCION COLEGIO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321704.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is above the bald mans head?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"11"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000006253.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is on the young mans shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"nasca"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000006253.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the building?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"36:6",
|
|
"36-6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the hair ionic system?",
|
|
"answer": "conair",
|
|
"gt_answers": [
|
|
"CONAIR"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03483316_46423.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many watts ionic system?",
|
|
"answer": "200 watts",
|
|
"gt_answers": [
|
|
"1875 watt Double ionic port system",
|
|
"1875"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03483316_46423.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white on the red bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Stagecoach"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000032458.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the street?",
|
|
"answer": "maple street",
|
|
"gt_answers": [
|
|
"main street"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333927.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the board displayed?",
|
|
"answer": "street sign",
|
|
"gt_answers": [
|
|
"old town temecula WiFi district",
|
|
"OLD TOW TEMECULA WI-FI DISTRICT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2333927.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was the photo taken?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2006"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03854065_8369.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is listed in yellow at the bottom?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2005"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01664065_5049.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the corner store?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"office depot"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/636.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is written on the white jersey?",
|
|
"answer": "indians",
|
|
"gt_answers": [
|
|
"Davis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329838.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the structure?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"Canada"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000161796.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which year is this expo?",
|
|
"answer": "2015",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000161796.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is being displayed?",
|
|
"answer": "canada",
|
|
"gt_answers": [
|
|
"Canada"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000161796.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the website address that is listed?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"www.versaci.fr"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What man's first name is shown on the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Pascal"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on sign board?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713180.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the right wing?",
|
|
"answer": "antenna",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364613.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the word say?",
|
|
"answer": "doublet",
|
|
"gt_answers": [
|
|
"Doublet 10X-20mm"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_7349.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on this sign?",
|
|
"answer": "cecilia street",
|
|
"gt_answers": [
|
|
"CECILIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000086831.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number block is displayed on the sign?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"800"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000086831.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the left word on the page?",
|
|
"answer": "horse",
|
|
"gt_answers": [
|
|
"stefano"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000375841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the image?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000375841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the capitalized word on the page?",
|
|
"answer": "horses",
|
|
"gt_answers": [
|
|
"Stefano Pertusati"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000375841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many meters are shown on the sign?",
|
|
"answer": "100",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000550783.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the sign?",
|
|
"answer": "nothing can stop me now",
|
|
"gt_answers": [
|
|
"Nothing can stop me now.",
|
|
"NOTHING CAN STOP ME NOW"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368955.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of bus is this?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"School bus"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001106.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the building?",
|
|
"answer": "gen inc",
|
|
"gt_answers": [
|
|
"GENERIS INC. CATERERS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red text say?",
|
|
"answer": "gen inc",
|
|
"gt_answers": [
|
|
"GENERIS INC. CATERERS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375529.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top word on the big tire?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"BEVEROL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04465501_11897.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bottom word on the big tire?",
|
|
"answer": "ford",
|
|
"gt_answers": [
|
|
"SCHROOT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04465501_11897.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the manufacturer of the motorcycle?",
|
|
"answer": "ducati",
|
|
"gt_answers": [
|
|
"Ducati"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000021151.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top left side of the front of the train?",
|
|
"answer": "mta",
|
|
"gt_answers": [
|
|
"VIRYF",
|
|
"UIRYF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in orange under the windsheild?",
|
|
"answer": "nyc subway",
|
|
"gt_answers": [
|
|
"GANA",
|
|
"GANZ"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written above the right windsheild?",
|
|
"answer": "amtrak",
|
|
"gt_answers": [
|
|
"JBGR",
|
|
"JBER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373431.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the on the timeline?",
|
|
"answer": "19th century",
|
|
"gt_answers": [
|
|
"14:56"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is writing on the blue board?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"Only1 Technology",
|
|
"Only 1 Technology"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the manufacturer of the espresso machine?",
|
|
"answer": "breville",
|
|
"gt_answers": [
|
|
"taurus"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03297495_1272.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Bawarchi"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000200.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the train?",
|
|
"answer": "aquarque",
|
|
"gt_answers": [
|
|
"Aguarague"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349702.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NUMBER OF THIS BUS?",
|
|
"answer": "no number on bus",
|
|
"gt_answers": [
|
|
"D2"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_24.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE NAME OF THIS BUS?",
|
|
"answer": "a bus",
|
|
"gt_answers": [
|
|
"NUS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_24.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What model of Yamaha is this",
|
|
"answer": "yzf r6",
|
|
"gt_answers": [
|
|
"GTS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358649.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the skier on the right?",
|
|
"answer": "20",
|
|
"gt_answers": [
|
|
"200"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340799.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the big sign refer in bus?",
|
|
"answer": "it is a double decker bus",
|
|
"gt_answers": [
|
|
"partybus zakopane",
|
|
"PARTYBUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354064.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the big black letters on mouth wash read?",
|
|
"answer": "advil",
|
|
"gt_answers": [
|
|
"Antiseptic Oral Rinse"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000520072.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the top row left sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"1,000"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326723.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the top row right sign?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"1,200"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326723.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the bottom row left sign?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"1,300"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326723.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of tour is the bus taking you on?",
|
|
"answer": "sightseeing",
|
|
"gt_answers": [
|
|
"OPEN TOP SIGHTSEEING",
|
|
"WASHINGTON DC OPEN TOP SIGHTSEEING"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324588.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"STOP HUMP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324065.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on this cup?",
|
|
"answer": "for",
|
|
"gt_answers": [
|
|
"Fido"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03063599_3904.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "ny park",
|
|
"gt_answers": [
|
|
"51"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to the park",
|
|
"gt_answers": [
|
|
"Hyde Park"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362524.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the arrow pointing left?",
|
|
"answer": "turn left",
|
|
"gt_answers": [
|
|
"Einbahn"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_150.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time does the sign start?",
|
|
"answer": "8am",
|
|
"gt_answers": [
|
|
"8 am"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000436048.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the truck?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000216579.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the glove?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"R"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000367823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the watermark on the image?",
|
|
"answer": "it is a copyright watermark",
|
|
"gt_answers": [
|
|
"iStockphoto"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04467665_41038.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the product?",
|
|
"answer": "special lasagne",
|
|
"gt_answers": [
|
|
"Special Lasagne"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016902.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what letters are in the circle on the front of the plave",
|
|
"answer": "w h j",
|
|
"gt_answers": [
|
|
"TWA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339787.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the church?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HANA",
|
|
"HANA CHURCH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000396528.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is affiliated with the name \"Hana?\"",
|
|
"answer": "hana is a surname",
|
|
"gt_answers": [
|
|
"CHURCH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000396528.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the title left to inbox say?",
|
|
"answer": "inbox",
|
|
"gt_answers": [
|
|
"next"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466001.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the title of the green book?",
|
|
"answer": "a book of hours",
|
|
"gt_answers": [
|
|
"teams"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000466001.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the white sign?",
|
|
"answer": "florida",
|
|
"gt_answers": [
|
|
"Florida"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363832.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many calories is half a carton of this?",
|
|
"answer": "tomato pasta",
|
|
"gt_answers": [
|
|
"80"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the amount of grams of sugar in half a carton?",
|
|
"answer": "0 g 0 g",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say about containing saturates?",
|
|
"answer": "0 g saturates",
|
|
"gt_answers": [
|
|
"trace"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029015.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name of the gas?",
|
|
"answer": "shell",
|
|
"gt_answers": [
|
|
"Shell"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03425413_17111.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the train?",
|
|
"answer": "0 4 6",
|
|
"gt_answers": [
|
|
"45110"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000031666.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "On what day can you get 1/2 price burgers?",
|
|
"answer": "friday",
|
|
"gt_answers": [
|
|
"Tues Night",
|
|
"TUES"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time can you get 1/2 price burgers?",
|
|
"answer": "on tuesdays and wednesdays",
|
|
"gt_answers": [
|
|
"5PM-Close"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can you get for 1/2 Price on Tues Night?",
|
|
"answer": "a movie ticket",
|
|
"gt_answers": [
|
|
"Burgers"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the top right graffiti tag say?",
|
|
"answer": "nyc",
|
|
"gt_answers": [
|
|
"CHARM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000239593.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline owns the plane",
|
|
"answer": "ethiopian airlines",
|
|
"gt_answers": [
|
|
"Ethiopian"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02690373_17262.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the team name printed on the jersey?",
|
|
"answer": "miami heat",
|
|
"gt_answers": [
|
|
"Heat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363604.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the jersey?",
|
|
"answer": "23",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2363604.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of candy is this?",
|
|
"answer": "gummy bears",
|
|
"gt_answers": [
|
|
"HERSHEY'S"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000026297.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the truck in the lower left corner?",
|
|
"answer": "u s airways",
|
|
"gt_answers": [
|
|
"860"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000475763.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on sale?`",
|
|
"answer": "cattle",
|
|
"gt_answers": [
|
|
"Computer"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000107353.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whose stocking is this?",
|
|
"answer": "mano",
|
|
"gt_answers": [
|
|
"Manoj"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03026506_1128.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this store?",
|
|
"answer": "denim store",
|
|
"gt_answers": [
|
|
"Bread & Butter"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_799.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the stop sign say?",
|
|
"answer": "crosswalk",
|
|
"gt_answers": [
|
|
"STOP FOR PEDESTRIAN IN CROSSWALK"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367176.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the license plate from?",
|
|
"answer": "new jersey",
|
|
"gt_answers": [
|
|
"ONTARIO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "tah",
|
|
"gt_answers": [
|
|
"J16 69L"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the blimb?",
|
|
"answer": "holden",
|
|
"gt_answers": [
|
|
"Holden"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02692877_9965.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who manufactured the can of fruit shown on the left side of this photo?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Del Monte"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000506045.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What white word appears at the top right of the image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"coffee"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347405.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What black word appears below the jar?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ESPRESSO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000347405.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "whats the only full word written here?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"and"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000212070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the guy's hoodie?",
|
|
"answer": "i'm a nerd",
|
|
"gt_answers": [
|
|
"DRUG FREE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000446834.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the arrow?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/786.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter in white at the top left of the photo?",
|
|
"answer": "r",
|
|
"gt_answers": [
|
|
"r"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03843555_11276.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "be smart don't walk in the street",
|
|
"gt_answers": [
|
|
"BE SMART Don't Walk in the Streey",
|
|
"BE SMART"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409222.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What colour is the main motorcycle?",
|
|
"answer": "black and green",
|
|
"gt_answers": [
|
|
"Black and green",
|
|
"black"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What colour bike is the man riding?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the white sign pointing to?",
|
|
"answer": "a building",
|
|
"gt_answers": [
|
|
"BELLE ISLE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331861.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the truck door?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GO USA",
|
|
"USA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000281652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What shape are the headlights on the truck?",
|
|
"answer": "square",
|
|
"gt_answers": [
|
|
"round"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000281652.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this computer?",
|
|
"answer": "dell",
|
|
"gt_answers": [
|
|
"DELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000311395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who made this computer?",
|
|
"answer": "dell",
|
|
"gt_answers": [
|
|
"DELL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000311395.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name written on the label of the bottle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"amstel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000458604.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the mayonnaise brand?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CAINS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000165684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the racquet frame?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"90"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the wall say in big letters?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"BUSINESS LOUNGE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322469.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Logan St.",
|
|
"Logan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the arrow demonstrate?",
|
|
"answer": "the direction of traffic",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the red sign?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000132123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is this bench?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"red bench"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2341781.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first letter listed on the end of the beam?",
|
|
"answer": "b",
|
|
"gt_answers": [
|
|
"C"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02777292_14309.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in gold letters on the sign attached to wooden posts?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MIURA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000491837.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What digits are written on the blue sign attached to the wooden post?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"30-19"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000491837.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is after Ginza",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is after the -?",
|
|
"answer": "street",
|
|
"gt_answers": [
|
|
"chome"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is before 6?",
|
|
"answer": "5",
|
|
"gt_answers": [
|
|
"Ginza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352827.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the soda brand in the silver box?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Pepsi"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000112459.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the product?",
|
|
"answer": "darigold chocolate milk",
|
|
"gt_answers": [
|
|
"DARIGOLD"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019180.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What vitamin does it contain?",
|
|
"answer": "vitamin d",
|
|
"gt_answers": [
|
|
"D"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019180.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company's brand logo is on the block?",
|
|
"answer": "monstrous",
|
|
"gt_answers": [
|
|
"movistar"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403290.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bottom word printed on the blue box?",
|
|
"answer": "solar",
|
|
"gt_answers": [
|
|
"SCHOOL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2318668.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the tag number on the car?",
|
|
"answer": "sc sc",
|
|
"gt_answers": [
|
|
"514 MFY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "|What is the number on the boys shirt inverted?|",
|
|
"answer": "inverted number on the boys shirt",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000155519.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "that number?",
|
|
"answer": "ten",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000486471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text say on the side of the plain",
|
|
"answer": "air france",
|
|
"gt_answers": [
|
|
"Air France"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company makes the syringe?",
|
|
"answer": "kodak",
|
|
"gt_answers": [
|
|
"KOJAK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04376876_30832.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do you insert into the slot?",
|
|
"answer": "coins",
|
|
"gt_answers": [
|
|
"COINS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592082.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the first floor business, on the left side of this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"City Chain",
|
|
"CITY CHAIN PRIMO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000336063.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which is the next coming airport?",
|
|
"answer": "schoenfeld",
|
|
"gt_answers": [
|
|
"Schonefeld.",
|
|
"Schonefeld"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334528.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which brand is being advertised?",
|
|
"answer": "k2",
|
|
"gt_answers": [
|
|
"Burton"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2375355.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the clothing brand seen in this picture?",
|
|
"answer": "versace",
|
|
"gt_answers": [
|
|
"Versace"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_697.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which airlines is the plane from?",
|
|
"answer": "canadian airlines",
|
|
"gt_answers": [
|
|
"air canada"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326939.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "From where is this boat?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"Queen City Yacht Club"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03874293_14577.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which word has the largest font in the picture?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"TENNIS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000205648.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What clock is underneath Germany?",
|
|
"answer": "green good house",
|
|
"gt_answers": [
|
|
"U.S.A",
|
|
"U.S.A."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332545.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the boat?",
|
|
"answer": "lighthouse",
|
|
"gt_answers": [
|
|
"Pilot"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2401637.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date of the picture?",
|
|
"answer": "september 2008",
|
|
"gt_answers": [
|
|
"20 Oct 2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000401083.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What fruit is in the plastic bags?",
|
|
"answer": "oranges",
|
|
"gt_answers": [
|
|
"grapes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000401083.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the boat?",
|
|
"answer": "plats",
|
|
"gt_answers": [
|
|
"PILOTS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354714.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the monument named on this bus?",
|
|
"answer": "the monument",
|
|
"gt_answers": [
|
|
"Stonehenge"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326548.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is world famous ?",
|
|
"answer": "statue of liberty",
|
|
"gt_answers": [
|
|
"Pizza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where are they located ?",
|
|
"answer": "on the sidewalk",
|
|
"gt_answers": [
|
|
"San Diego"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331884.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is the jackpot?",
|
|
"answer": "$2 million",
|
|
"gt_answers": [
|
|
"15 million"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000848.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of dog do you see in the picture?",
|
|
"answer": "bloodhound",
|
|
"gt_answers": [
|
|
"bloodhound"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02088466_4715.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the printed text at the top say?",
|
|
"answer": "players' cigarettes",
|
|
"gt_answers": [
|
|
"PLAYER'S CIGARETTES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02088466_4715.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say in the man's blue shirt?",
|
|
"answer": "hot dog",
|
|
"gt_answers": [
|
|
"New York"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350153.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sign is above the door?",
|
|
"answer": "exit sign",
|
|
"gt_answers": [
|
|
"EXIT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/254.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the left side say?",
|
|
"answer": "mrt",
|
|
"gt_answers": [
|
|
"Blush!"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the man's chest?",
|
|
"answer": "one",
|
|
"gt_answers": [
|
|
"820"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the red circle?",
|
|
"answer": "claro",
|
|
"gt_answers": [
|
|
"Claro"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03777754_7428.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What meal is this?",
|
|
"answer": "lasagna",
|
|
"gt_answers": [
|
|
"Chile relleno"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012696.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are written by the number 21?",
|
|
"answer": "a, b, c, d, e, f, g, h, i, j, k",
|
|
"gt_answers": [
|
|
"MM"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_1132.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's written in bold letters in the center of the item on the right?",
|
|
"answer": "triplet",
|
|
"gt_answers": [
|
|
"TRIPLET"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03692522_1132.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many people are in the alley?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000329726.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say?",
|
|
"answer": "hollywood",
|
|
"gt_answers": [
|
|
"Hollywood"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000582.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the yellow shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Lobster"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355593.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the noun form of the word choose?",
|
|
"answer": "election",
|
|
"gt_answers": [
|
|
"Choice"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04023962_28195.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is listed on the image?",
|
|
"answer": "mapleimage com",
|
|
"gt_answers": [
|
|
"naplesimage.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04273569_25629.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the maximum percentage you can save at this Store?",
|
|
"answer": "70",
|
|
"gt_answers": [
|
|
"70%"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_392.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in pink/red?",
|
|
"answer": "glide",
|
|
"gt_answers": [
|
|
"glide"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_680.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What complete word is seen directly behind the running woman?",
|
|
"answer": "games",
|
|
"gt_answers": [
|
|
"Games"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334284.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the boat?",
|
|
"answer": "venice",
|
|
"gt_answers": [
|
|
"Accademia"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000056193.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo taken?",
|
|
"answer": "2009",
|
|
"gt_answers": [
|
|
"2009"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000536059.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which car manufacture is shown on the advertising boards?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Kia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361363.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this picture taken?",
|
|
"answer": "1984",
|
|
"gt_answers": [
|
|
"2013"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364355.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number on the vehicle shown?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"0257b"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000256576.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the word in blue say on the back of the vehicle?",
|
|
"answer": "police",
|
|
"gt_answers": [
|
|
"police"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000256576.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number located on the back right of the vehicle?",
|
|
"answer": "san francisco",
|
|
"gt_answers": [
|
|
"1877-4fps-411",
|
|
"1-877-4fps-411"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000256576.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is toward the left?",
|
|
"answer": "detour",
|
|
"gt_answers": [
|
|
"Detour"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2374603.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written next to the arrow on the black sign?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"Entrance"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342596.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the license plate?",
|
|
"answer": "london",
|
|
"gt_answers": [
|
|
"BGJ 115"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4400.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the top street sign?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"2000"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000143995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the weather currently like?",
|
|
"answer": "it is raining",
|
|
"gt_answers": [
|
|
"Stormy"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02794156_7077.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What day is it?",
|
|
"answer": "monday",
|
|
"gt_answers": [
|
|
"Day 14"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07697537_3932.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHICH PHOTOGRAPHY IS IT",
|
|
"answer": "it is a photograph of a bridge in a city",
|
|
"gt_answers": [
|
|
"MLECHUGA PHOTOGRAPHY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/5251.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name in the bottom left corner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Tracey Sanders"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000533308.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is given in the bottom left corner?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000533308.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street on the yellow sign?",
|
|
"answer": "elm street",
|
|
"gt_answers": [
|
|
"Main Street"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000417129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company uses the truck?",
|
|
"answer": "towing company",
|
|
"gt_answers": [
|
|
"Abbey Transport LTD",
|
|
"ABBEY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592233.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How high does a jackpot have to be to be paid for by the attendant?",
|
|
"answer": "the jackpot has to be at least $100",
|
|
"gt_answers": [
|
|
"1000"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_16210.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the web address given",
|
|
"answer": "www claas com",
|
|
"gt_answers": [
|
|
"freefoto.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03496892_39042.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which street sign is higher than the other?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"HIGH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411897.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word appears twice on the remotes?",
|
|
"answer": "control",
|
|
"gt_answers": [
|
|
"Fear"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_1277.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the third remote?",
|
|
"answer": "remote",
|
|
"gt_answers": [
|
|
"dumb"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_1277.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is on the first remote?",
|
|
"answer": "remote",
|
|
"gt_answers": [
|
|
"Hate"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_1277.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in black text on the sign?",
|
|
"answer": "humped zebra crossing",
|
|
"gt_answers": [
|
|
"Humped Zebra Crossing"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414416.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the skaters t-shirt?",
|
|
"answer": "uncon",
|
|
"gt_answers": [
|
|
"bacon"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2376700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the caption say on the photo?",
|
|
"answer": "splash of milk",
|
|
"gt_answers": [
|
|
"GOOD TO THE VERY LAST DROP"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07932039_9087.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written at front side of the train?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"First"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000407814.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white sign say?",
|
|
"answer": "road closed",
|
|
"gt_answers": [
|
|
"Road closed"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713303.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the orange sign say?",
|
|
"answer": "road closed",
|
|
"gt_answers": [
|
|
"Slow"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713303.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many traffic cones are in the picture?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713303.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the words on the green bar at the top of the box?",
|
|
"answer": "classic waffle maker",
|
|
"gt_answers": [
|
|
"WAFFLE MAKER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04542943_6049.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What series is this waffle maker?",
|
|
"answer": "classic series",
|
|
"gt_answers": [
|
|
"Brushed Stainless Series",
|
|
"BRUSHED STAINLESS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04542943_6049.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many settings is on this waffle maker?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04542943_6049.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the watermark read?",
|
|
"answer": "harry potter",
|
|
"gt_answers": [
|
|
"Ilya Spirin Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2330269.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the large word on the front of the truck say?",
|
|
"answer": "co",
|
|
"gt_answers": [
|
|
"COPP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the flavor of this Healthy Choice meal?",
|
|
"answer": "tomato basil",
|
|
"gt_answers": [
|
|
"Fresh Mixers"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013596.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flag is up?",
|
|
"answer": "american flag",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373784.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT TYPE OF BUS IS IT",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"SCHOOL BUS"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT COLOR IS THE BUS",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"YELLOW"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the storefront in red?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Snackbar"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000535113.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name written on top of the phone number on the trailer?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WILLIAM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000525580.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is he playing?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"baseball"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000326628.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the biggest word printed in red on the blue sign?",
|
|
"answer": "travel",
|
|
"gt_answers": [
|
|
"Unlimited"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_525.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is written on the bus sign?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Orange"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322599.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the blue shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Legends"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is shown in the picture?",
|
|
"answer": "a tube of chapstick",
|
|
"gt_answers": [
|
|
"chapstick",
|
|
"Chap Stick"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012544.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the camera?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Kinaflex"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_296.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What street?",
|
|
"answer": "50 street",
|
|
"gt_answers": [
|
|
"50"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324824.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in black?",
|
|
"answer": "horizon",
|
|
"gt_answers": [
|
|
"Horizon"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is address does the red text show?",
|
|
"answer": "http://www",
|
|
"gt_answers": [
|
|
"CVS.com"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002753.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue snowboard?",
|
|
"answer": "a name",
|
|
"gt_answers": [
|
|
"GMR AR 158-01"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317572.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat?",
|
|
"answer": "dorset",
|
|
"gt_answers": [
|
|
"Doretta L"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331393.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who has copyright over this image?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Bridget Samuels"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415485.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was it copyrighted?",
|
|
"answer": "1976",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415485.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the business?",
|
|
"answer": "beers on call",
|
|
"gt_answers": [
|
|
"Geeks on call"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000433397.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this advertisement for?",
|
|
"answer": "waste management",
|
|
"gt_answers": [
|
|
"wastetech",
|
|
"PHS WASTETECH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000045494.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is phone number for Wasteetch?",
|
|
"answer": "030 333 0000",
|
|
"gt_answers": [
|
|
"0870 241 7491"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000045494.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who posted this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FANCYSHOTS.NL"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000127924.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What establishment name is written at the very top of the taller sign?",
|
|
"answer": "rafael street",
|
|
"gt_answers": [
|
|
"CENTRAL PARK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000545310.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What opportunity is indicated lower on the tall green sign?",
|
|
"answer": "free parking",
|
|
"gt_answers": [
|
|
"PARKING"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000545310.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name indicated on the lower sign with the white background, on the foreground face of that sign?",
|
|
"answer": " ",
|
|
"gt_answers": [
|
|
"BOGRASHOV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000545310.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the white sign?",
|
|
"answer": "sams club",
|
|
"gt_answers": [
|
|
"Sale"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_154.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text inside the square?",
|
|
"answer": "up the pole",
|
|
"gt_answers": [
|
|
"Up the pole"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03976657_26826.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "which year given",
|
|
"answer": "2008",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02119789_16556.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What option on the dial is spelled out?",
|
|
"answer": "shuffle",
|
|
"gt_answers": [
|
|
"MENU"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03584254_244.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the house?;",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"5041"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what's street name ?",
|
|
"answer": "broadway",
|
|
"gt_answers": [
|
|
"Broadway",
|
|
"BROADWAY W 120 ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2357281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the screen of the bus window?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000332684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what two words are at the top of the black sign?",
|
|
"answer": "road closed",
|
|
"gt_answers": [
|
|
"area closed"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000110217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what word is on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000110217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "when is the area closed?",
|
|
"answer": "during hunting season",
|
|
"gt_answers": [
|
|
"SUNSET-8AM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000110217.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the bottom of the case?",
|
|
"answer": "happy holidays",
|
|
"gt_answers": [
|
|
"She can not swim, nor can I"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03908618_40842.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers on the bus?",
|
|
"answer": "the numbers on the bus are person",
|
|
"gt_answers": [
|
|
"792"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000551679.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"702",
|
|
"792"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000551679.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the plane?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MABLE CODY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000360352.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does Bell holding on her hand?",
|
|
"answer": "tennis racket",
|
|
"gt_answers": [
|
|
"bell"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000271282.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written at the bottom of the photo?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"J.KHONGHUNPHOTO"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03131574_5704.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the book shown?",
|
|
"answer": "eight cousins",
|
|
"gt_answers": [
|
|
"EIGHT COUSINS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07248320_98323.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What author is credited on the book cover?",
|
|
"answer": "louisa may alcott",
|
|
"gt_answers": [
|
|
"LOUISA MAY ALCOTT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07248320_98323.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the dug out?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"340"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000234234.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number written the most upper left on the sign behind the person?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"3071"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000353754.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written in the bottom right side?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"23/4-07"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02094114_1658.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name showing between the dog's paws?",
|
|
"answer": "alexander",
|
|
"gt_answers": [
|
|
"Asbjorn"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02094114_1658.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What district is the Police car from?",
|
|
"answer": "4th district",
|
|
"gt_answers": [
|
|
"3rd"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000107011.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the right side of the bumper of the Police car?",
|
|
"answer": "57",
|
|
"gt_answers": [
|
|
"3071"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000107011.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two name brands are printed on the wall?",
|
|
"answer": "person and person",
|
|
"gt_answers": [
|
|
"adidas FedEx"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000256057.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which airline is shown on the tail?",
|
|
"answer": "all nippon airlines",
|
|
"gt_answers": [
|
|
"ANA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000481049.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the plane's identifying number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"N7B7EX",
|
|
"N787EX"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000481049.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the TV station shown on the sign at the top?",
|
|
"answer": "fox sports",
|
|
"gt_answers": [
|
|
"FSN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317417.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of this bus?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319281.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the color of the line that the train on the left is on?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Orange line",
|
|
"ORANGE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345629.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the pink shield",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"LIVE THE LIFE YOU LOVE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_467.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many chicks are there?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7026.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the middle chick?",
|
|
"answer": "black",
|
|
"gt_answers": [
|
|
"Black"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/7026.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of candle is it?",
|
|
"answer": "a wedding candle",
|
|
"gt_answers": [
|
|
"Marry Candle"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02948072_11043.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word at the top?",
|
|
"answer": "crystal",
|
|
"gt_answers": [
|
|
"Cristal"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028574.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of board reference?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"500 atp world"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2320398.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the team name on the batter's jersey?",
|
|
"answer": "orioles",
|
|
"gt_answers": [
|
|
"orioles"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000382695.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this airline?",
|
|
"answer": "air canada",
|
|
"gt_answers": [
|
|
"Air Transat"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323834.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number above the bench?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"340"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373871.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "this is a toyota what?",
|
|
"answer": "4runner",
|
|
"gt_answers": [
|
|
"TUNDRA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000125377.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say under the blue house on the white remote?",
|
|
"answer": "dvr",
|
|
"gt_answers": [
|
|
"HOME"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000422343.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number in the red t-shirt player who is standing behind yellow t-shirt in the field?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317119.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What avenue is shown?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"IMIQTAVIK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000039926.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the orange text say?",
|
|
"answer": "a man standing in a flooded street",
|
|
"gt_answers": [
|
|
"Wells Fargo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342273.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the contact number on the to let board?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"01244 408200"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000058597.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the numerals written on either side of the clock?",
|
|
"answer": "roman numerals",
|
|
"gt_answers": [
|
|
"18 97"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000058597.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the top left oft he ad?",
|
|
"answer": "remove",
|
|
"gt_answers": [
|
|
"Remove"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03109150_8960.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is number listed on the front of the bike?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"67"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346985.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what's flight no.?",
|
|
"answer": "flight no",
|
|
"gt_answers": [
|
|
"N650KB"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337307.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the website on this photo?",
|
|
"answer": "huangshan photography",
|
|
"gt_answers": [
|
|
"www.bbker.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n09468604_16958.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the book with the house on the cover?",
|
|
"answer": "person's house",
|
|
"gt_answers": [
|
|
"Little Pink House"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000555339.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sign say?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop.",
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409400.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the speed limit?",
|
|
"answer": "50 mph",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160818.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first phrase on the screen/",
|
|
"answer": "psp emulator",
|
|
"gt_answers": [
|
|
"Please wait..."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03485407_6288.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What drink is on the sign?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Coca-cola",
|
|
"COCA COLA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1808.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much was the bet?",
|
|
"answer": "the bet was 100 credits",
|
|
"gt_answers": [
|
|
"$0.25"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_29962.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much did the person win?",
|
|
"answer": "$600",
|
|
"gt_answers": [
|
|
"$2.07"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_29962.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of the black car?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"HGX 6946"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000511301.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the Zipcar Street Team located?",
|
|
"answer": "the zipcar street team is located in new york city",
|
|
"gt_answers": [
|
|
"ON THE CORNER OF WILLIAM PENN PL AND 5TH AVE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000511301.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the car number?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"540425"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331542.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of soda is shown?",
|
|
"answer": "coke",
|
|
"gt_answers": [
|
|
"Diet cream soda"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who makes the soda?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"HyVee"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405907.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is doing the women?",
|
|
"answer": "dancing",
|
|
"gt_answers": [
|
|
"Singing"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339173.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where type of blackthorns are sold?",
|
|
"answer": "in shops",
|
|
"gt_answers": [
|
|
"Irish"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of crops are sold?",
|
|
"answer": "vegetables and fruits",
|
|
"gt_answers": [
|
|
"Riding"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last item listed on the right sign?",
|
|
"answer": "sticks",
|
|
"gt_answers": [
|
|
"Swordsticks"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355129.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the fender of the motorcycle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Dallas police"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000184397.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What might you earn instead of real money in this image if you followed the guidance?",
|
|
"answer": "points",
|
|
"gt_answers": [
|
|
"STARS"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the street sign in the middle say?",
|
|
"answer": "dan whitney street",
|
|
"gt_answers": [
|
|
"14th street"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343070.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which Canadian province is named on the awning?",
|
|
"answer": "quebec",
|
|
"gt_answers": [
|
|
"QUEBEC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000553700.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of company on the red sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Big Horn Brewing Co."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000136832.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is on the sign in blue?",
|
|
"answer": "'do not enter'",
|
|
"gt_answers": [
|
|
"T"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3886.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of one sponsor for this event?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"eastern daily press"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000494721.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say at the very top of the blue road sign?",
|
|
"answer": "bangkok international airport",
|
|
"gt_answers": [
|
|
"Rabindra Sadan"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the road heading left on the road sign?",
|
|
"answer": "bangalore road",
|
|
"gt_answers": [
|
|
"Lee Road"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the address refer by bus?",
|
|
"answer": "the address refer by bus",
|
|
"gt_answers": [
|
|
"73 ARMITAGE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367541.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the pillow?",
|
|
"answer": "peace",
|
|
"gt_answers": [
|
|
"Peace"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362220.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the yellow board",
|
|
"answer": "time for a pint",
|
|
"gt_answers": [
|
|
"Time NO TIES PLEASE For a Pint"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000246787.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text on the red train?",
|
|
"answer": "sc sc",
|
|
"gt_answers": [
|
|
"DB",
|
|
"DB 185 334-7"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000310108.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What European country is this in?",
|
|
"answer": "germany",
|
|
"gt_answers": [
|
|
"UK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02088094_4462.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters on the right car?",
|
|
"answer": "em",
|
|
"gt_answers": [
|
|
"SNCF"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336513.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "name on the banner with white background",
|
|
"answer": "on the banner with white background",
|
|
"gt_answers": [
|
|
"thermen lamer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355724.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What district does this school belong to?",
|
|
"answer": "williamsport school district",
|
|
"gt_answers": [
|
|
"Sterling School District 2",
|
|
"STERLING SCHOOL"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008177.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the card?",
|
|
"answer": "john smith",
|
|
"gt_answers": [
|
|
"Oma"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/4796.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign on the right say?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"Duong pasteur"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2405748.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON BUS SHELTER ADVERTISEMENT BOARD?",
|
|
"answer": "nyc subway",
|
|
"gt_answers": [
|
|
"FREE MOVE? FOR REAL."
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000141200.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of match box?",
|
|
"answer": "safety matches",
|
|
"gt_answers": [
|
|
"Safety Matches"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03729826_10893.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many matches does this box contains?",
|
|
"answer": "50 matches",
|
|
"gt_answers": [
|
|
"50 MATCHES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03729826_10893.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the logo on match box?",
|
|
"answer": "safety matches",
|
|
"gt_answers": [
|
|
"HOME BRAND"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03729826_10893.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the middle of the street arch?",
|
|
"answer": "an arch",
|
|
"gt_answers": [
|
|
"ENCINITAS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361083.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who runs this bus?",
|
|
"answer": "philippine national airlines",
|
|
"gt_answers": [
|
|
"DAGUPAN BUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus's number?",
|
|
"answer": "no number",
|
|
"gt_answers": [
|
|
"71719"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362841.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the companies name?",
|
|
"answer": "pink penny",
|
|
"gt_answers": [
|
|
"pink penny"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_58785.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is this a promotion for?",
|
|
"answer": "pink penny",
|
|
"gt_answers": [
|
|
"A WEDDING ALBUM DESIGN BOUTIQUE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_58785.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what this in bus colour?",
|
|
"answer": "purple and white",
|
|
"gt_answers": [
|
|
"red",
|
|
"red & white"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347902.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many number of bus?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"1"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347902.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the bus number?",
|
|
"answer": "t2",
|
|
"gt_answers": [
|
|
"193"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347902.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the Ave?",
|
|
"answer": "westlake ave",
|
|
"gt_answers": [
|
|
"Westlake Ave",
|
|
"WESTLAKE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372895.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where did they buy the coffee?",
|
|
"answer": "the coffee shop",
|
|
"gt_answers": [
|
|
"Lebkov & Sons Good Food",
|
|
"LEBKOV"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07920052_15897.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "the stock restaurant",
|
|
"gt_answers": [
|
|
"THE SHACK RESTAURANT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406980.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what are the blue letters?",
|
|
"answer": "the name of the lake",
|
|
"gt_answers": [
|
|
"CMS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000431991.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what sign is shown below J F Kennedy Bl?",
|
|
"answer": "no parking sign",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359671.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what street is pictured?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"J F Kennedy Bl"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359671.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the shop in the photo?",
|
|
"answer": "no name",
|
|
"gt_answers": [
|
|
"the Piper",
|
|
"PIPER"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359671.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the top front of the white bus?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"Mounties"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2353651.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the boy's shirt?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red and white",
|
|
"RED"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2322401.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign on bus",
|
|
"answer": "mobile pizza",
|
|
"gt_answers": [
|
|
"lostbrothers"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370329.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of pizzeria",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"908-333-8415"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370329.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the web address given",
|
|
"answer": "www.mobilepizza.com",
|
|
"gt_answers": [
|
|
"www.lostbrotherspizza.com"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2370329.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of crossing is this?",
|
|
"answer": "railroad crossing",
|
|
"gt_answers": [
|
|
"Railroad crossing",
|
|
"RAILROAD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000116732.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the hotdog?",
|
|
"answer": "ketchup and mustard",
|
|
"gt_answers": [
|
|
"Cheese"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much would a FETCH DOG cost?",
|
|
"answer": "$5",
|
|
"gt_answers": [
|
|
"$4.25"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2354946.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the baby's shirt say?",
|
|
"answer": "i love music",
|
|
"gt_answers": [
|
|
"baby girl"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02834397_24370.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is it going to be a baby boy or baby girl?",
|
|
"answer": "girl",
|
|
"gt_answers": [
|
|
"baby girl"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02834397_24370.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What percentage is the milk?",
|
|
"answer": "2%",
|
|
"gt_answers": [
|
|
"2%"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000317986.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of milk?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Darigold"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000317986.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What vitamins are in the milk?",
|
|
"answer": "vitamin d",
|
|
"gt_answers": [
|
|
"Vitamin A & D"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000317986.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is watermarked in the bottom left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Koen Brauwer",
|
|
"KOEN BROUWER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000226451.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is shown in the image?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_10301.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is represented in this picture?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_10301.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is INFANTE's jersey number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000537543.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who are the photographers?",
|
|
"answer": "arkive",
|
|
"gt_answers": [
|
|
"S. and A. Sailer",
|
|
"S. AND A.SAILER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02487347_19051.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the person on the television?",
|
|
"answer": "obama",
|
|
"gt_answers": [
|
|
"Barack Obama"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000435584.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which animal is this designed for?",
|
|
"answer": "mouse",
|
|
"gt_answers": [
|
|
"Mouse"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03794056_13377.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company owns this plane?",
|
|
"answer": "fedex",
|
|
"gt_answers": [
|
|
"FedEx"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000118920.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this?",
|
|
"answer": "university of technology",
|
|
"gt_answers": [
|
|
"INDIAN INSTITUTE OF TECHNOLOGY"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000042.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what number is on the building?",
|
|
"answer": "201",
|
|
"gt_answers": [
|
|
"400"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is the building?",
|
|
"answer": "gray",
|
|
"gt_answers": [
|
|
"grey"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1490.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on the window to the right?",
|
|
"answer": "jollibee",
|
|
"gt_answers": [
|
|
"Jollibean"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_89.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store in the picture?",
|
|
"answer": "jolly bean",
|
|
"gt_answers": [
|
|
"Jollibean"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_89.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word of the description at the bottom of the picture?",
|
|
"answer": "kitchen",
|
|
"gt_answers": [
|
|
"Back"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word of the description at the bottom of the picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"section"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the title of the photo?",
|
|
"answer": "the kitchen",
|
|
"gt_answers": [
|
|
"back portion of new section"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000160214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who provided the image?",
|
|
"answer": "nottingham vet school",
|
|
"gt_answers": [
|
|
"Nottingham Vet School"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000574471.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what mattress company is on the bears pajamas?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Dreams"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000011029.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the orange sign with the arrow pointing right?",
|
|
"answer": "detour",
|
|
"gt_answers": [
|
|
"DETOUR"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342993.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what large electrical brand can be seen on a white banner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"TOSHIBA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000191850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number of this car?",
|
|
"answer": "bc bc",
|
|
"gt_answers": [
|
|
"CC-738-VG"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4253.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate?",
|
|
"answer": "a cow",
|
|
"gt_answers": [
|
|
"RJ-27"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331927.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in the bottom corner?",
|
|
"answer": "ostrich",
|
|
"gt_answers": [
|
|
"Selga"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01518878_194.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the blue dustbin",
|
|
"answer": "swedish for skateboard",
|
|
"gt_answers": [
|
|
"esport"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000477428.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the third line of text on the shirt say?",
|
|
"answer": "i love frisbee",
|
|
"gt_answers": [
|
|
"I was there!"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000226294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate on the white car?",
|
|
"answer": "md md",
|
|
"gt_answers": [
|
|
"1B-4967 OH",
|
|
"IB-4967-OH"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"255",
|
|
"218"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000289231.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number on the bus?",
|
|
"answer": "london ltd",
|
|
"gt_answers": [
|
|
"YX60 DWY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000289231.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number on the bus?",
|
|
"answer": "cbd 005",
|
|
"gt_answers": [
|
|
"yn03 ncf"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000251922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to london bridge",
|
|
"gt_answers": [
|
|
"Bellfields"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000251922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the byus?",
|
|
"answer": "byus",
|
|
"gt_answers": [
|
|
"Arriva"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000251922.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word of the yellow lettering?",
|
|
"answer": "al",
|
|
"gt_answers": [
|
|
"Copyrights"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03729826_4215.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where is the departure point of the yellow bus?",
|
|
"answer": "sydney harbour bridge",
|
|
"gt_answers": [
|
|
"Roosevelt Square"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000443764.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the phone number of River Ride?",
|
|
"answer": "the phone number of river ride is 030 222 0000",
|
|
"gt_answers": [
|
|
"33-22-555"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000443764.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the pink writing say on the black shirt?",
|
|
"answer": "drop",
|
|
"gt_answers": [
|
|
"Drop the bomb"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_6106.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number written near the front wheel of the vehicle?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"767"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347614.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the big word on the hoodie?",
|
|
"answer": "unification",
|
|
"gt_answers": [
|
|
"Unification"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04370456_6779.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business written with yellow?",
|
|
"answer": "pizzeria",
|
|
"gt_answers": [
|
|
"FERRAMENTA COLORI"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3921.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who took this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"RAMESH RASAIYAN"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000341985.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on this building?",
|
|
"answer": "hilton garden inn",
|
|
"gt_answers": [
|
|
"Marriott"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000758.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this location?",
|
|
"answer": "birmingham square",
|
|
"gt_answers": [
|
|
"Marina Square"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_107.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is displayed at the top of the truck in this picture?",
|
|
"answer": "drayage company",
|
|
"gt_answers": [
|
|
"Bender"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04467665_59160.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the top front of the truck?",
|
|
"answer": "big tex",
|
|
"gt_answers": [
|
|
"Bender"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04467665_59160.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the year printed on this photo?",
|
|
"answer": "1976",
|
|
"gt_answers": [
|
|
"2007"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2352861.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the second hand pointing to?",
|
|
"answer": "10",
|
|
"gt_answers": [
|
|
"9"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414439.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which number will the minute hand indicate next?",
|
|
"answer": "12",
|
|
"gt_answers": [
|
|
"7"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414439.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the upcoming hour?",
|
|
"answer": "a teddy bear and a clock",
|
|
"gt_answers": [
|
|
"6"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000414439.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How far away is Takeshiba Sanbashi?",
|
|
"answer": "it is a short distance away",
|
|
"gt_answers": [
|
|
"400m"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06794110_1142.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How far away is the Subway?",
|
|
"answer": "5 minutes",
|
|
"gt_answers": [
|
|
"300m"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06794110_1142.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How far away is Kyu Shiba-rikyu Gardens?",
|
|
"answer": "from kyoto station",
|
|
"gt_answers": [
|
|
"190m"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06794110_1142.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the bus?",
|
|
"answer": "passengers",
|
|
"gt_answers": [
|
|
"NYPD"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/630.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are above the red door?",
|
|
"answer": "t o p",
|
|
"gt_answers": [
|
|
"HSW"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is above the red door?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"17"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of FOOD is it?",
|
|
"answer": "fish and chips",
|
|
"gt_answers": [
|
|
"FAST",
|
|
"fast food"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367835.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the shirt say?",
|
|
"answer": "i'm a character",
|
|
"gt_answers": [
|
|
"I'M A CHARACTER @ #140CONF"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000136963.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the banana?",
|
|
"answer": "welcome home",
|
|
"gt_answers": [
|
|
"Welcome Home Derek",
|
|
"WELCOME HOME OERGY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366474.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "when was the photo taken?",
|
|
"answer": "the photo was taken",
|
|
"gt_answers": [
|
|
"08/03/2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04493381_10628.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the back of the black tshirt?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"Tuska"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336626.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "mk ii",
|
|
"gt_answers": [
|
|
"OTL 267"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4346.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is their team name",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"GV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000296614.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the bold type in the ticket in the advertisement on the left say?",
|
|
"answer": "ice hockey",
|
|
"gt_answers": [
|
|
"Backyard Fun"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the back of the grey jersey of the hockey player on the right?",
|
|
"answer": "eddie giroux",
|
|
"gt_answers": [
|
|
"Wilson"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the grey jersey?",
|
|
"answer": "nhl",
|
|
"gt_answers": [
|
|
"Wilson"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the Black jersey?",
|
|
"answer": "nhl",
|
|
"gt_answers": [
|
|
"Alzner"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this bus going?",
|
|
"answer": "to the mountains",
|
|
"gt_answers": [
|
|
"Ottawa"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340248.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number fence is the horse jumping over?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"4"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000246345.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first name of the company hosting this event?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"DODSON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000246345.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is in bold letters on the paper in the picture?",
|
|
"answer": "star",
|
|
"gt_answers": [
|
|
"Carving"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02966687_6334.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the warning sign instruction?",
|
|
"answer": "do not cross tracks",
|
|
"gt_answers": [
|
|
"Use Extreme Caution"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red and white sign say?",
|
|
"answer": "bicycle tracks",
|
|
"gt_answers": [
|
|
"No Parking Any Time"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of roads are the signs depicting?",
|
|
"answer": "tracks",
|
|
"gt_answers": [
|
|
"One Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316172.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text at the bottom?",
|
|
"answer": "luxor temple",
|
|
"gt_answers": [
|
|
"Phistoria.net"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03788195_7835.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign say on the green pole?",
|
|
"answer": "stockholm metro",
|
|
"gt_answers": [
|
|
"0-32a"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000050366.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who owns the bus?",
|
|
"answer": "havit",
|
|
"gt_answers": [
|
|
"hyatt regency",
|
|
"HYATT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326553.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the green wall?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"WLMI 92.9"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411130.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the largest word on the computer screen say?",
|
|
"answer": "open",
|
|
"gt_answers": [
|
|
"Think"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347730.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the common phrase on all three pieces of clothing?",
|
|
"answer": "go team go",
|
|
"gt_answers": [
|
|
"Go Team Go"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_25.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on objects ?",
|
|
"answer": "go team go",
|
|
"gt_answers": [
|
|
"Go team go"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_25.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of animal is shown?",
|
|
"answer": "dog",
|
|
"gt_answers": [
|
|
"Dog"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02099601_8698.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first two words in the box?",
|
|
"answer": "black and white",
|
|
"gt_answers": [
|
|
"Loading Zoom"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04026417_9739.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two words are listed after the comma in the box?",
|
|
"answer": "black and white",
|
|
"gt_answers": [
|
|
"please wait"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04026417_9739.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE WEBSITE SHOWN IN THE IMAGE",
|
|
"answer": "taylormadejewelry",
|
|
"gt_answers": [
|
|
"www.islandgems.biz"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03814906_32536.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the photographer name",
|
|
"answer": "silvia tyler",
|
|
"gt_answers": [
|
|
"gladstone taylor"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03814906_32536.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate of the bus?",
|
|
"answer": "el santo",
|
|
"gt_answers": [
|
|
"BSL523",
|
|
"BSL 523"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the bus going?",
|
|
"answer": "to the next stop",
|
|
"gt_answers": [
|
|
"Flete-Far"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "chinese restaurant",
|
|
"gt_answers": [
|
|
"La Roca"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358850.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the book store?",
|
|
"answer": "the university bookstore",
|
|
"gt_answers": [
|
|
"Sussex University Bookshop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368229.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the train number?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"238"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000014925.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is labeled on the awning sign?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"XEROX"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328044.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is advertised with two x's in the name?",
|
|
"answer": "xtra",
|
|
"gt_answers": [
|
|
"XEROX"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328044.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on the plane",
|
|
"answer": "glider",
|
|
"gt_answers": [
|
|
"RN 0-1842"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367950.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can be found above the keyboard?",
|
|
"answer": "mouse",
|
|
"gt_answers": [
|
|
"user guide",
|
|
"IMAC USER GUIDE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000128074.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the brand of the vending machine on the left?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Coca Cola"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04525305_10434.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is one of the sponsors of this baseball field?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"mastercard"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000557944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on Ichird's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"51"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000557944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on #51's jersey?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ichird"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000557944.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name associated with the picture?",
|
|
"answer": "cobra snake",
|
|
"gt_answers": [
|
|
"Kees de Vries"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n01748264_7516.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written with white on the building",
|
|
"answer": "garbage",
|
|
"gt_answers": [
|
|
"Open Pantry"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334707.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are they selling?",
|
|
"answer": "shoes",
|
|
"gt_answers": [
|
|
"Shoes",
|
|
"VEGETARIAN SHOES"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04200800_20245.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the red sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3574.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What should you do when you come upon the Red Sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3574.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the locomotive?",
|
|
"answer": "gsf",
|
|
"gt_answers": [
|
|
"1566"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416332.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the octagonal red sign say?",
|
|
"answer": "all way stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2365012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which brand of car is this?",
|
|
"answer": "land rover",
|
|
"gt_answers": [
|
|
"Land Rover"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02814533_77309.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the automaker of this car?",
|
|
"answer": "land rover",
|
|
"gt_answers": [
|
|
"Land Rover"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02814533_77309.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the car plate of this car?",
|
|
"answer": "ldx 0",
|
|
"gt_answers": [
|
|
"Y591MJX"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02814533_77309.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the left side of the bus?",
|
|
"answer": "the number of the bus",
|
|
"gt_answers": [
|
|
"440"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000099844.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last two digits of the white truck's license plate?",
|
|
"answer": "28",
|
|
"gt_answers": [
|
|
"22"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000266914.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the tennis players shorts?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Green",
|
|
"Green Black"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347113.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the ball boys shirt?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Red",
|
|
"Red White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347113.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the main color is the tennis players shirt?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"White"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347113.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the black disc?",
|
|
"answer": "nikon",
|
|
"gt_answers": [
|
|
"Nikon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03657121_1520.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in yellow?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355665.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written above the red rectangle?",
|
|
"answer": "a man giving a presentation",
|
|
"gt_answers": [
|
|
"Parent A"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360285.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written above the green rectangle?",
|
|
"answer": "a man giving a presentation",
|
|
"gt_answers": [
|
|
"Parent B"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360285.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the fitbess club?",
|
|
"answer": "fitbess club",
|
|
"gt_answers": [
|
|
"Elite",
|
|
"Elite Fitness Club"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant?",
|
|
"answer": "tasty",
|
|
"gt_answers": [
|
|
"ELITE restaurant"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3272.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word shown on the left-hand side?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Liberte"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159887.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in the bottom right corner of the picture say?",
|
|
"answer": "brushing your teeth",
|
|
"gt_answers": [
|
|
"exposed Ismar Badzic by",
|
|
"EXPOSED BY ISMAR BADZIC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000284911.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers can be seen on the score board behind the player?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"5 3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000396534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What logo can be scene on the right side of the man's shits",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"UNI QLO",
|
|
"UNIQLO"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000396534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What can you get 6 of for $5?",
|
|
"answer": "donuts",
|
|
"gt_answers": [
|
|
"Donuts"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404764.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which traffic sign is this?",
|
|
"answer": "stop sign",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339855.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is this train?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"187 019-5"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412509.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is this plane for?",
|
|
"answer": "ukraine",
|
|
"gt_answers": [
|
|
"Ukraine"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350985.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand name on top of the speedometer?",
|
|
"answer": "adc",
|
|
"gt_answers": [
|
|
"Adecco"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2324464.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTENON THE WALL",
|
|
"answer": "first st metro",
|
|
"gt_answers": [
|
|
"FIRST ST METRO"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348016.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns the copyright for this picture?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"G. Frank Peterson"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406209.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What words are written in yellow on the pavement?",
|
|
"answer": "bus stop",
|
|
"gt_answers": [
|
|
"BUS STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000247346.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the game?",
|
|
"answer": "pompeii",
|
|
"gt_answers": [
|
|
"Pompeii"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04243546_24990.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this photo copyrighted?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2008"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02011460_1841.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last two numbers?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"94"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03947888_38031.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the 3 digit number written at the bottom of the memorial?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"346"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02892201_5769.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the third letter in the sign?",
|
|
"answer": "o",
|
|
"gt_answers": [
|
|
"O"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409078.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the address of the website listed?",
|
|
"answer": "com",
|
|
"gt_answers": [
|
|
"WWW.DX01.COM"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04204238_6992.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the first three numbers on the bottom row?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"577"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04204238_6992.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the last three numbers on the bottom row?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"582"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04204238_6992.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number on the red car?",
|
|
"answer": "st john",
|
|
"gt_answers": [
|
|
"GSV 820"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4348.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the airplane?",
|
|
"answer": "boeing",
|
|
"gt_answers": [
|
|
"continental"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000409020.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the word before noble?",
|
|
"answer": "noble",
|
|
"gt_answers": [
|
|
"barnes"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000241003.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word after Barnes?",
|
|
"answer": "books",
|
|
"gt_answers": [
|
|
"noble"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000241003.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is full name on boxes?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"barnes noble",
|
|
"BARNES & NOBLE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000241003.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the bus?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"43"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000352019.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What drink is included as an ingredient for this cake?",
|
|
"answer": "coffee liqueur",
|
|
"gt_answers": [
|
|
"Coffee"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344721.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word can you see in the picture?",
|
|
"answer": "mon",
|
|
"gt_answers": [
|
|
"MON"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03710193_279.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is this girl's hair long or short?",
|
|
"answer": "long",
|
|
"gt_answers": [
|
|
"short"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_12604.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the top line written on the blue sticker?",
|
|
"answer": "a parking meter",
|
|
"gt_answers": [
|
|
"Buy Parking Cards at"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the 3 digit number on the first line on the white sticker?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"363"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the 5 digit number on the second line on the white sticker?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"3370"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358376.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the street sign say?",
|
|
"answer": "king street",
|
|
"gt_answers": [
|
|
"Peterborough"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/164.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What shop is to the left of the picture?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"Skechers"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_368.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What level is shown in the picture?",
|
|
"answer": "second floor",
|
|
"gt_answers": [
|
|
"L3"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_368.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in the white oval on the left side?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"MyProfe"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048145.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last letter written in white in the center of an oval?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"e"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048145.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name present in the left back of the car",
|
|
"answer": "suzuki",
|
|
"gt_answers": [
|
|
"NISSAN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349249.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number in the plate",
|
|
"answer": "a c",
|
|
"gt_answers": [
|
|
"BETTXA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349249.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written on the green box?",
|
|
"answer": "computer",
|
|
"gt_answers": [
|
|
"Router"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000254787.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the drivers window?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"158793"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2329696.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the company name of the left plane?",
|
|
"answer": "batik air",
|
|
"gt_answers": [
|
|
"Kenya Airways"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323848.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What name is displayed on the right plane?",
|
|
"answer": "batik air",
|
|
"gt_answers": [
|
|
"Martinair Cargo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323848.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first blue letter?",
|
|
"answer": "a",
|
|
"gt_answers": [
|
|
"s"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415709.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name if the sign with a yellow background and red and white logo?",
|
|
"answer": "canadian tire",
|
|
"gt_answers": [
|
|
"HOME",
|
|
"HOME HARDWARE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355786.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of cars is sold at this dealership?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Honda"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000599.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which state appears on the hoodie?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"alaska"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000275556.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes next after salty?",
|
|
"answer": "sweet",
|
|
"gt_answers": [
|
|
"dawg"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000275556.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the neon orange sign?",
|
|
"answer": "k",
|
|
"gt_answers": [
|
|
"p"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000271862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of this person who is showing his back side?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000579179.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written on his T-shirt at back side <",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WDMSC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000579179.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What should you do when you see these signs?",
|
|
"answer": "look at them",
|
|
"gt_answers": [
|
|
"stop"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000292058.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bus number?",
|
|
"answer": "nbsp",
|
|
"gt_answers": [
|
|
"2407",
|
|
"1407"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1592506.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What's the namen of the street sign?",
|
|
"answer": "garden street",
|
|
"gt_answers": [
|
|
"garden st"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000542695.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What hours on a Saturday allows for 2 hr parking?",
|
|
"answer": "8 am to 6 pm",
|
|
"gt_answers": [
|
|
"9AM-10PM"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2350138.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is mentioned in the image?",
|
|
"answer": "usa",
|
|
"gt_answers": [
|
|
"USA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03127925_4045.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is this electronic?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"Edirol"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000017342.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is in blue on the bank?",
|
|
"answer": "1989/90",
|
|
"gt_answers": [
|
|
"1878"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03935335_11776.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the girl's sweater say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Universita Roma"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000074515.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the horse?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"69"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355830.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who copyrighted this image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"katy wrathall"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355830.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "With what format can they edit uncompressed?",
|
|
"answer": "flv",
|
|
"gt_answers": [
|
|
"ProRes"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04404412_30664.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is pictured on the large television?",
|
|
"answer": "maker",
|
|
"gt_answers": [
|
|
"Make"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04404412_30664.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "The wines of which state are you asked to discover?",
|
|
"answer": "california",
|
|
"gt_answers": [
|
|
"California",
|
|
"CALIFORNIA WINES"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2331294.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What do the paper signs on the motorcycle say?",
|
|
"answer": "the teddy bears picnic",
|
|
"gt_answers": [
|
|
"No To the Bike Parking Tax"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000234720.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many men are standing by the train?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"2"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000206155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the mountain at that the presidential train ascending?",
|
|
"answer": "the summit of mount person",
|
|
"gt_answers": [
|
|
"Alarcon"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000206155.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the three digit number that forms part of the registration plate of the vehicle in this image?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"254"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000516189.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What follows Save NASA on the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"IMPEACH OBAMA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000192539.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What court number are they playing on?",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"9"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2345447.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the batter?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"22"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2413839.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written below the stop sign?",
|
|
"answer": "no parking",
|
|
"gt_answers": [
|
|
"All Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2325110.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the route number displayed next to the bus door?",
|
|
"answer": "the bus number",
|
|
"gt_answers": [
|
|
"45"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000024489.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What 5 words are written in red script?",
|
|
"answer": "flying in an emergency",
|
|
"gt_answers": [
|
|
"Safe flying is no accident",
|
|
"Safe flying is no accident!"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000373596.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photgrapher",
|
|
"answer": "michael mccoy",
|
|
"gt_answers": [
|
|
"scott carpenter"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02018207_24340.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "To what airline does this plane belong?",
|
|
"answer": "austrian airlines",
|
|
"gt_answers": [
|
|
"Austrian Airlines",
|
|
"AUSTRIAN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2359137.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the white banner with red lettering?",
|
|
"answer": "person's farm",
|
|
"gt_answers": [
|
|
"KIDS MEALS SPECIALS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000446677.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the Blue baseball hat with the yellow bill?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CAT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000446677.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the bottle?",
|
|
"answer": "300",
|
|
"gt_answers": [
|
|
"300"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_8929.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the brewery?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"The Chiltern Brewery"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_8929.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word written in cursive?",
|
|
"answer": "beer",
|
|
"gt_answers": [
|
|
"Old Ale"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02823428_8929.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the photographer, as written on the bottom right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Hayward"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000294199.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of foods selling here?",
|
|
"answer": "fried foods",
|
|
"gt_answers": [
|
|
"Turkey legs, Baked potatoes, fresh pizza Spaghetti"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368696.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word in white is visible on the back left ski?",
|
|
"answer": "fs",
|
|
"gt_answers": [
|
|
"HEAD"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000458475.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "where do you find this chair?",
|
|
"answer": "barber shop",
|
|
"gt_answers": [
|
|
"barber"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02791124_7207.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of Chair is this",
|
|
"answer": "barber chair",
|
|
"gt_answers": [
|
|
"Barber Chair",
|
|
"BARBER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02791124_7207.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this bus going?",
|
|
"answer": "to a store",
|
|
"gt_answers": [
|
|
"CROSSTOWN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2315803.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last word on the t-shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"IT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000226747.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the top plane?",
|
|
"answer": "number",
|
|
"gt_answers": [
|
|
"718"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000060953.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is written on the blue sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Reliance Mobile"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000406770.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the glass door?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"22"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411661.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is upside down in the circle on the box to the left?",
|
|
"answer": "lego",
|
|
"gt_answers": [
|
|
"longwave"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000334568.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the green sign say?",
|
|
"answer": "winter ville",
|
|
"gt_answers": [
|
|
"WINTER HOLLY LN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2414324.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the juice that the children are drinking in the photo?",
|
|
"answer": "apple juice",
|
|
"gt_answers": [
|
|
"HiC"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000119560.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What organization does this jet belong to?",
|
|
"answer": "us military",
|
|
"gt_answers": [
|
|
"NAVY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3890.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this grocery store?",
|
|
"answer": "market urban market",
|
|
"gt_answers": [
|
|
"Harvest urban market"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2403920.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the third poster on the right with the person say?",
|
|
"answer": "thank you",
|
|
"gt_answers": [
|
|
"Hyperion"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_468.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the soccer jersey?",
|
|
"answer": "the team name",
|
|
"gt_answers": [
|
|
"Malaysia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356514.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the train?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"375826"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349394.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the top shelf in green letters?",
|
|
"answer": "donuts",
|
|
"gt_answers": [
|
|
"Lee's Donuts"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2339169.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What Company is on present on the bus?",
|
|
"answer": "diamond bus",
|
|
"gt_answers": [
|
|
"The detroit bus co.",
|
|
"Detroit Bus Co."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is printed on the side of the green can?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Mountain Dew"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098355.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What location is printed at the top end of the green can?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"NEW YORK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000098355.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What six letter word is visible in this picture?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"LAWSON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What six letter word is visible in this picture?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Lawson"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What six letter word is visible in this picture?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"lawson"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000450362.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on white board",
|
|
"answer": "xerox",
|
|
"gt_answers": [
|
|
"xerox"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001325.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this building?",
|
|
"answer": "the yellow house",
|
|
"gt_answers": [
|
|
"Bar Ristorante Tana Della Volpe"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000277321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is the building in the background a school or a hotel?",
|
|
"answer": "school",
|
|
"gt_answers": [
|
|
"Hotel"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000277321.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written inside the board",
|
|
"answer": "a street sign",
|
|
"gt_answers": [
|
|
"CARRER DE LA LLIBERTAT"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355954.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the restaurant",
|
|
"answer": "the truck is parked in front of a restaurant",
|
|
"gt_answers": [
|
|
"cafenineteen"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000336021.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the black and white sign say?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"ONE WAY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276853.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the big red sign say?",
|
|
"answer": "one way",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000276853.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is this team located?",
|
|
"answer": "st louis, missouri",
|
|
"gt_answers": [
|
|
"Springfield"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000484355.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What country is this beach located in?",
|
|
"answer": "costa rica",
|
|
"gt_answers": [
|
|
"Costa Rica"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000192795.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is this beach near?",
|
|
"answer": "santa teresa",
|
|
"gt_answers": [
|
|
"Santa Teresa"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000192795.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the black book on the top shelf with red letters, to the left of the word Comics?",
|
|
"answer": "superman",
|
|
"gt_answers": [
|
|
"FROM HELL"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_19401.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word in yellow beside the book titled \"From Hell\"?",
|
|
"answer": "hell",
|
|
"gt_answers": [
|
|
"COMICS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06596364_19401.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the text in blue say?",
|
|
"answer": "family portrait",
|
|
"gt_answers": [
|
|
"FAMILY PORTRAIT",
|
|
"FAMILY PORTAIT"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02510455_14.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price for one sign piece?",
|
|
"answer": "one pound",
|
|
"gt_answers": [
|
|
"15"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the price for two sign pieces?",
|
|
"answer": "the price for two sign pieces",
|
|
"gt_answers": [
|
|
"27"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3167.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are to the left of the clock?",
|
|
"answer": "abc",
|
|
"gt_answers": [
|
|
"ABC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377181.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the right side of the clock?",
|
|
"answer": "serving central oregon",
|
|
"gt_answers": [
|
|
"1240 KC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377181.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of watches sold by the store on the left",
|
|
"answer": "swatch",
|
|
"gt_answers": [
|
|
"Swatch"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1900.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the board?",
|
|
"answer": "your dad is god",
|
|
"gt_answers": [
|
|
"Fathers Day",
|
|
"Your Dad Called And he told us what he wants For Fathers Day!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326897.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the wall?",
|
|
"answer": "cotton on",
|
|
"gt_answers": [
|
|
"Cotton On"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_857.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is printed on the plane?",
|
|
"answer": "go air",
|
|
"gt_answers": [
|
|
"Condor"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362856.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date shown on the picture?",
|
|
"answer": "january 1",
|
|
"gt_answers": [
|
|
"Jun 13 2010"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2340368.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word in english on the sign?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Advance"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000248263.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is one street in this intersection?",
|
|
"answer": "riverdale",
|
|
"gt_answers": [
|
|
"Riverdale Av.",
|
|
"RIVERDALE AV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437597.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is one street in this intersection?",
|
|
"answer": "riverdale",
|
|
"gt_answers": [
|
|
"Riverdale Av.",
|
|
"RIVERDALE AV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000437597.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What instrument is this man playing?",
|
|
"answer": "harp",
|
|
"gt_answers": [
|
|
"Harp"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03495258_15256.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the driver taking the kids?",
|
|
"answer": "to school",
|
|
"gt_answers": [
|
|
"School"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_001080.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign of train refer",
|
|
"answer": "a train refers to a passenger train",
|
|
"gt_answers": [
|
|
"drs"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321177.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number given on train",
|
|
"answer": "number given on train",
|
|
"gt_answers": [
|
|
"37087"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2321177.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the side of the tents?",
|
|
"answer": "tents for sale",
|
|
"gt_answers": [
|
|
"Telefonica"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03792972_6150.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the sponsor for the tents?",
|
|
"answer": "samsung",
|
|
"gt_answers": [
|
|
"Telefonica"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03792972_6150.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Where is the white bus going?",
|
|
"answer": "to the city",
|
|
"gt_answers": [
|
|
"Stephanshorn",
|
|
"STEPHONSHORN"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04487081_17696.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What photography company is listed?",
|
|
"answer": "adobe stock",
|
|
"gt_answers": [
|
|
"NORTHLINE PHOTOGRAPHY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416729.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the word say inside the arrow pointing right?",
|
|
"answer": "chocolate",
|
|
"gt_answers": [
|
|
"Chocolate"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371284.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the white top sign say?",
|
|
"answer": "chocolate",
|
|
"gt_answers": [
|
|
"Herb Caen Way"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2371284.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the bus?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"23"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000403534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of bus is this?",
|
|
"answer": "school bus",
|
|
"gt_answers": [
|
|
"PRIVATE bus",
|
|
"PRIVATE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000403534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is this bus known as?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"23"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000403534.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What flavor are the captain's wafers pictured in the photo?",
|
|
"answer": "vanilla",
|
|
"gt_answers": [
|
|
"Sour cream and chives."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand are the crackers?",
|
|
"answer": "ritz",
|
|
"gt_answers": [
|
|
"Lance"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012918.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how man players are there",
|
|
"answer": "two",
|
|
"gt_answers": [
|
|
"one"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364854.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what it the name of the sponsor of this event?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"INTERSPORT"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000270123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "who is the sponsor in red on the skiers uniform?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"CC ATH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000270123.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "how many miles does the display view",
|
|
"answer": "how many miles does the display view",
|
|
"gt_answers": [
|
|
"10001"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03841143_24382.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What time was this photo taken?",
|
|
"answer": "10 am",
|
|
"gt_answers": [
|
|
"05:38"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What month and day was this photo taken?",
|
|
"answer": "september 1st",
|
|
"gt_answers": [
|
|
"04/18"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "In which year was this photo taken?",
|
|
"answer": "2012",
|
|
"gt_answers": [
|
|
"2012"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029687.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the blue boat?",
|
|
"answer": "the blue boat",
|
|
"gt_answers": [
|
|
"El Pirate"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351012.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of potion is this?",
|
|
"answer": "love potion",
|
|
"gt_answers": [
|
|
"LOVE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03062245_3384.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What team does the red jersey belong to?",
|
|
"answer": "miami heat",
|
|
"gt_answers": [
|
|
"Heat"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03595614_23406.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street crossing Wall St?",
|
|
"answer": "wall street",
|
|
"gt_answers": [
|
|
"Broadway"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000428991.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street crossing Broadway St?",
|
|
"answer": "wall street",
|
|
"gt_answers": [
|
|
"Wall"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000428991.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the title of the watermark written on the lower right hand corner of the image?",
|
|
"answer": "concorde",
|
|
"gt_answers": [
|
|
"BLPhotography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2335334.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business?",
|
|
"answer": "mike's market",
|
|
"gt_answers": [
|
|
"Mike's Market"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2351663.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the key left of the missing ones?",
|
|
"answer": "microsoft",
|
|
"gt_answers": [
|
|
"Alt"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462224.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written next to the Windows symbol?",
|
|
"answer": "microsoft",
|
|
"gt_answers": [
|
|
"start"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462224.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number has the key above the star?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000462224.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is on the shirt?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"London"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3175.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the ball player's shirt?",
|
|
"answer": "2",
|
|
"gt_answers": [
|
|
"51"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367336.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is written in the stadium",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"KIA"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000313541.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word written under the puzzle?",
|
|
"answer": "bridge",
|
|
"gt_answers": [
|
|
"HEIDELBERG"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03598930_2773.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is yellow ship no.?",
|
|
"answer": "boat",
|
|
"gt_answers": [
|
|
"RX60"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is brown ship no.?",
|
|
"answer": "boat",
|
|
"gt_answers": [
|
|
"RX55"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/1159823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the red plate on the front of the train?",
|
|
"answer": "santa fe",
|
|
"gt_answers": [
|
|
"2472"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327621.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is on the can?",
|
|
"answer": "crush",
|
|
"gt_answers": [
|
|
"Crush Orange"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000483530.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the woman's shirt?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Pink"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159069.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the woman's skirt?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Brown"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159069.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the moped?",
|
|
"answer": "red",
|
|
"gt_answers": [
|
|
"Red"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000159069.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the three letters still visible on the street sign?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"TOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2366705.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What music studio is this?",
|
|
"answer": "universal music group",
|
|
"gt_answers": [
|
|
"Universal Music"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1302.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "universal Music is a subsidiary of what company?",
|
|
"answer": "universal entertainment group",
|
|
"gt_answers": [
|
|
"Universal"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1302.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who works in this build?",
|
|
"answer": "police officers",
|
|
"gt_answers": [
|
|
"Police"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000923.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say on the purple shirt?",
|
|
"answer": "citizen volunteers",
|
|
"gt_answers": [
|
|
"Citizen Volunteer"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2336642.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the visible text in the upper right corner say?",
|
|
"answer": "mercado de libros",
|
|
"gt_answers": [
|
|
"G3 BAZA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/3174.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the right side of the pizza box say?",
|
|
"answer": "pizza hut",
|
|
"gt_answers": [
|
|
"Enjoy your pizza"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000196480.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the top-left of the cake?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Last"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000213344.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the watermock text say?",
|
|
"answer": "the watermock text say",
|
|
"gt_answers": [
|
|
"shutterstock"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02669723_9686.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number at the bottom?",
|
|
"answer": "1",
|
|
"gt_answers": [
|
|
"36875461"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02669723_9686.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN ON THERE",
|
|
"answer": "built in 1946",
|
|
"gt_answers": [
|
|
"BUILT 1945 CREWE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343054.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHICH VEHICLE IS THERE",
|
|
"answer": "train",
|
|
"gt_answers": [
|
|
"TRAIN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2343054.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date shown in this picture?",
|
|
"answer": "christmas eve",
|
|
"gt_answers": [
|
|
"23 Dec"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338472.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the function of the vehicle?",
|
|
"answer": "ambulance",
|
|
"gt_answers": [
|
|
"AMBULANCE"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594945_11676.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the model name of the vehicle?",
|
|
"answer": "defender",
|
|
"gt_answers": [
|
|
"DEFENDER"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594945_11676.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top front of the bus?",
|
|
"answer": "puerto rico",
|
|
"gt_answers": [
|
|
"POLPERRO TRAM CO."
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2332331.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is written on the man's shirt?",
|
|
"answer": "3",
|
|
"gt_answers": [
|
|
"Three"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362748.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the man's head?",
|
|
"answer": "hat",
|
|
"gt_answers": [
|
|
"glasses"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362748.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Brand name?",
|
|
"answer": "fiesta",
|
|
"gt_answers": [
|
|
"Culinary Delights"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of Gumbo is in the package?",
|
|
"answer": "cajun",
|
|
"gt_answers": [
|
|
"SHRIMP & SAUSAGE"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What style selection is this product?",
|
|
"answer": "asian",
|
|
"gt_answers": [
|
|
"Cajun"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010995.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name on top of label",
|
|
"answer": "big house red",
|
|
"gt_answers": [
|
|
"big house red"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04591713_374.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does it say directly under the wind shield?",
|
|
"answer": "lincoln city centre",
|
|
"gt_answers": [
|
|
"Training bus"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000378821.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What city is the pitcher playing for?",
|
|
"answer": "new york city",
|
|
"gt_answers": [
|
|
"Toledo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348105.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are visible on the red sign on the right?",
|
|
"answer": "0 0",
|
|
"gt_answers": [
|
|
"180",
|
|
"1-800"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348105.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two letters are visible on the sign on the left?",
|
|
"answer": "mlb",
|
|
"gt_answers": [
|
|
"as"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348105.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of injury is it used for?",
|
|
"answer": "traumatic brain injury",
|
|
"gt_answers": [
|
|
"Traumatic bleeding"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the dimensions?",
|
|
"answer": "x x inches",
|
|
"gt_answers": [
|
|
"3 IN X 4 YDS"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008590.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brands are advertised on the train?",
|
|
"answer": "swiss railways",
|
|
"gt_answers": [
|
|
"SAMSUNG"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000283309.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of electronic is being advertised?",
|
|
"answer": "smart phone",
|
|
"gt_answers": [
|
|
"SMART TV"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000283309.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the number of the vehicle?",
|
|
"answer": "8808",
|
|
"gt_answers": [
|
|
"82038"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317837.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the name of the vehicle?",
|
|
"answer": "paras",
|
|
"gt_answers": [
|
|
"PARTAS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317837.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the website?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"WingHouse"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n06359193_4947.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is on the plane?",
|
|
"answer": "passengers",
|
|
"gt_answers": [
|
|
"Malaysia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2412684.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is located at this next exit?",
|
|
"answer": "hidden worlds gentes park 2",
|
|
"gt_answers": [
|
|
"Hidden Worlds Cenotes Park."
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/4671.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who owns the Irish restaurant and bar?",
|
|
"answer": "the irish restaurant and bar is owned by the irish pub group",
|
|
"gt_answers": [
|
|
"Malones",
|
|
"MALONE'S"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_444.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the restaurant shown?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Malone's Irish Restaurant & Bar",
|
|
"MALONE'S"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_444.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the product?",
|
|
"answer": "a speaker",
|
|
"gt_answers": [
|
|
"infinty"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03691459_48693.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the front of this train?",
|
|
"answer": "46",
|
|
"gt_answers": [
|
|
"485"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000335804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name labeled behind the number 485?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"north EAST express"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000335804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on this train?",
|
|
"answer": "japan railways",
|
|
"gt_answers": [
|
|
"EAST JAPAN RAILWAY COMPANY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000335804.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the 6 letters visible on the front of this store?",
|
|
"answer": "sander",
|
|
"gt_answers": [
|
|
"Tander"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/713035.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is displayed on the truck",
|
|
"answer": "a crane",
|
|
"gt_answers": [
|
|
"roxu"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03126707_25027.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What numbers are on the front window?",
|
|
"answer": "the bus number",
|
|
"gt_answers": [
|
|
"1019"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2348477.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word can be read on the fire hydrant?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Fire"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000035972.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What follows the word no?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"smoking"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000272040.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the green fence?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Poland Spring"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000566175.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What ad is displayed under numbers?",
|
|
"answer": "mcdonalds",
|
|
"gt_answers": [
|
|
"Samsung"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2421.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the lender above the tourist og sign?",
|
|
"answer": "citibank",
|
|
"gt_answers": [
|
|
"Prudential Financial"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2421.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the truck?",
|
|
"answer": "garbage",
|
|
"gt_answers": [
|
|
"todos somos orizaba",
|
|
"ORIZABA"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03417042_8367.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the airline of the plane?",
|
|
"answer": "air canada",
|
|
"gt_answers": [
|
|
"Air Canada"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404919.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the bran name on display?",
|
|
"answer": "minkin",
|
|
"gt_answers": [
|
|
"Minikin"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is advertised on the display window to the right in red writing?",
|
|
"answer": "minikin",
|
|
"gt_answers": [
|
|
"instinct takes over"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_375.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is this store called?",
|
|
"answer": "marie hubert",
|
|
"gt_answers": [
|
|
"Marie-Hubert"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04200800_4494.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number train is this?",
|
|
"answer": "a train",
|
|
"gt_answers": [
|
|
"82304"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404158.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two words are written directly after U.S. on the plane?",
|
|
"answer": "air force",
|
|
"gt_answers": [
|
|
"AIR FORCE"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000115862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What four digit number is written directly under the window ?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"9211"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000115862.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the Name of the Ad?",
|
|
"answer": "paris",
|
|
"gt_answers": [
|
|
"Paribas"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349488.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of TV remote is this?",
|
|
"answer": "sony",
|
|
"gt_answers": [
|
|
"ARCHOS"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04074963_18951.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written at the bottom of the picture?",
|
|
"answer": "a man on a skateboard",
|
|
"gt_answers": [
|
|
"Foto: Leo Gomes"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355050.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of website being displayed in screen?",
|
|
"answer": "starbucks",
|
|
"gt_answers": [
|
|
"Starbucks coffee company"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the country name written on the doll?",
|
|
"answer": "brazil",
|
|
"gt_answers": [
|
|
"BRASIL"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2369607.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers written in red on the plane?",
|
|
"answer": "f-16",
|
|
"gt_answers": [
|
|
"211"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415778.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the door?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327872.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what does the fire truck have written on it?",
|
|
"answer": "san diego fire department",
|
|
"gt_answers": [
|
|
"Rescue 9"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03345487_2906.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the text in the bottom left corner?",
|
|
"answer": "chevrolet silverado towing a boat",
|
|
"gt_answers": [
|
|
"1stpix"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000344998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the occupation listed on the side of the green truck?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"PARK RANGER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000344998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the photo tag?",
|
|
"answer": "mushrooms",
|
|
"gt_answers": [
|
|
"Raymond McNeil"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n13037406_478.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS THE LETTER WRITTEN",
|
|
"answer": "nec",
|
|
"gt_answers": [
|
|
"NEC"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04238763_19010.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT COLOUR IS ON BOTH SIDE",
|
|
"answer": "black and silver",
|
|
"gt_answers": [
|
|
"BLACK"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04238763_19010.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is shown on the closest boat in the frame to the camera?",
|
|
"answer": "33",
|
|
"gt_answers": [
|
|
"33"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the last name of the person who owns the copyright of this photo?",
|
|
"answer": "mcdonald",
|
|
"gt_answers": [
|
|
"Marbach"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which letter is seen to the left of the large \"33\" on the first boat?",
|
|
"answer": "e",
|
|
"gt_answers": [
|
|
"E"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2316102.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the ski trail?",
|
|
"answer": "black diamond",
|
|
"gt_answers": [
|
|
"Charlie Banks Trail",
|
|
"CHARLIE BANKS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the distance of the trail?",
|
|
"answer": "5 km",
|
|
"gt_answers": [
|
|
"3K"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What ski level is the trail rated?",
|
|
"answer": "intermediate",
|
|
"gt_answers": [
|
|
"Advanced-Intermediate"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2367935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the label of each bottle?",
|
|
"answer": "the name of the beard oil",
|
|
"gt_answers": [
|
|
"Feel Good"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03937543_1013.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what website is advertised here?",
|
|
"answer": "chester zoo",
|
|
"gt_answers": [
|
|
"ChesterZoo.org"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404232.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is in the image?",
|
|
"answer": "a jar of salsa",
|
|
"gt_answers": [
|
|
"Nutrition Facts"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013746.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many calories are in this package?",
|
|
"answer": "0 calories per serving",
|
|
"gt_answers": [
|
|
"10 calories",
|
|
"10"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013746.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of Indian Institute is this building?",
|
|
"answer": "institute of technology",
|
|
"gt_answers": [
|
|
"technology"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000059.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word comes after \"Indian Institute of\"?",
|
|
"answer": "engineering",
|
|
"gt_answers": [
|
|
"technology"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000059.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word on the sign above the arrow?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"wa"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264456.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the word below \"ON' on the sign?",
|
|
"answer": "on",
|
|
"gt_answers": [
|
|
"wa"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264456.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers under the street sign \"Fuller\"",
|
|
"answer": "the numbers under the street sign are the street names",
|
|
"gt_answers": [
|
|
"1600"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000264456.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the arrow's label read?",
|
|
"answer": "old man grinds",
|
|
"gt_answers": [
|
|
"old man grinds"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000095924.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What AVE. is this?",
|
|
"answer": "dead end street",
|
|
"gt_answers": [
|
|
"MOODY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2346768.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is this photo by?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"JUAN N ONLY"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000000109.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company name is printed on the guitar pick?",
|
|
"answer": "fender",
|
|
"gt_answers": [
|
|
"Fender"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03929660_17906.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in all capital letters on the guitar pick?",
|
|
"answer": "rock",
|
|
"gt_answers": [
|
|
"MEDIUM"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03929660_17906.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the side of the fire truck?",
|
|
"answer": "9",
|
|
"gt_answers": [
|
|
"41"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000359695.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the shop at the top of the sign?",
|
|
"answer": "wool shop",
|
|
"gt_answers": [
|
|
"Fromagerie"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2355017.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the bus in the picture?",
|
|
"answer": "yellow cab",
|
|
"gt_answers": [
|
|
"City Sights NY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2427.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What sport is being played?",
|
|
"answer": "baseball",
|
|
"gt_answers": [
|
|
"Baseball"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2337458.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How many signs are there?",
|
|
"answer": "three",
|
|
"gt_answers": [
|
|
"4",
|
|
"three"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323113.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the blue hoodie?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"83"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000308470.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is on the player in yellow?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"13"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000308470.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is in red?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"3"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000308470.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is listed?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"FreakyFrugalite.com"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000491932.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name on the cake?",
|
|
"answer": "christopher",
|
|
"gt_answers": [
|
|
"Christopher"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2416833.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind type facts are on this can?",
|
|
"answer": "nutrition facts",
|
|
"gt_answers": [
|
|
"Nutrition"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000584.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of game system is this person playing with?",
|
|
"answer": "wii",
|
|
"gt_answers": [
|
|
"Wii"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402476.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters stated on the hat ?",
|
|
"answer": "babe",
|
|
"gt_answers": [
|
|
"N Y",
|
|
"NY"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2349876.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the numbers on the monitor?",
|
|
"answer": "the numbers on the monitor are the date and time",
|
|
"gt_answers": [
|
|
"10 11"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000383893.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in white on the sign?",
|
|
"answer": "art",
|
|
"gt_answers": [
|
|
"Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342774.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in yellow on the sign?",
|
|
"answer": "art",
|
|
"gt_answers": [
|
|
"Art"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2342774.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat (written on white board) ?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"ternen gilleleje"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000164000.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the window?",
|
|
"answer": "11",
|
|
"gt_answers": [
|
|
"112"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02701002_10365.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the license plate number?",
|
|
"answer": "111",
|
|
"gt_answers": [
|
|
"SFA RK 352"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02701002_10365.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of car is shown in the image?",
|
|
"answer": "audi",
|
|
"gt_answers": [
|
|
"Ford"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2362899.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is on the guys shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Harley Davidson",
|
|
"HARLEY-DAVIDSON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000167644.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the top of the green wall?",
|
|
"answer": "fed cup",
|
|
"gt_answers": [
|
|
"Rolex"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2377304.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the boat?",
|
|
"answer": "no name",
|
|
"gt_answers": [
|
|
"Viking"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358259.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the letters in the circle?",
|
|
"answer": "a b c d",
|
|
"gt_answers": [
|
|
"abc"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03141823_20027.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the month date-stamped on the picture?",
|
|
"answer": "august",
|
|
"gt_answers": [
|
|
"06"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000515249.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is his jacket?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"Blue",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373518.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is his helmet?",
|
|
"answer": "blue",
|
|
"gt_answers": [
|
|
"silver",
|
|
"{}"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373518.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of this business?",
|
|
"answer": "la bottega del cafe",
|
|
"gt_answers": [
|
|
"La Bottega del Caffe"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1274.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number can be found on the sign?",
|
|
"answer": "5",
|
|
"gt_answers": [
|
|
"5"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/1274.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word appears on the top left corner?",
|
|
"answer": "i",
|
|
"gt_answers": [
|
|
"Nickeeth"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000044998.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "WHAT IS WRITTEN IN THIS IMAGE?",
|
|
"answer": "welcome to hope",
|
|
"gt_answers": [
|
|
"HOMAGE TO HAPA"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2323348.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is the sign on top refer",
|
|
"answer": "the sign on top refers",
|
|
"gt_answers": [
|
|
"2828 yes optus",
|
|
"YES OPTUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2338963.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Is there more coffee or pizza here?",
|
|
"answer": "coffee",
|
|
"gt_answers": [
|
|
"Coffee"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028068.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign tell you to do?",
|
|
"answer": "stop",
|
|
"gt_answers": [
|
|
"STOP"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328833.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the label on the drink cup?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Coca-Cola"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000509509.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the yellow sign say?",
|
|
"answer": "no smoking",
|
|
"gt_answers": [
|
|
"Low Prices Daily"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_606.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the store front read?",
|
|
"answer": "nike",
|
|
"gt_answers": [
|
|
"collectpoint"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_870.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store in the back?",
|
|
"answer": "zara",
|
|
"gt_answers": [
|
|
"Collectpoint"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_870.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the hat?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"C"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000435695.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What airline is this plane from?",
|
|
"answer": "united airlines",
|
|
"gt_answers": [
|
|
"UNITED"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417823.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is company name written on bus ?",
|
|
"answer": "brighton and hove",
|
|
"gt_answers": [
|
|
"BRIGHTON&HOVE"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2327873.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number in the green jersey facing away from the camera?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"8"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000048442.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written under STOP?",
|
|
"answer": "driving",
|
|
"gt_answers": [
|
|
"Driving"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2409386.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What color is the no-parking sign?",
|
|
"answer": "yellow",
|
|
"gt_answers": [
|
|
"Yellow"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2361163.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What car brand is represented in this photograph?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Honda"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter represents this store front's brand?",
|
|
"answer": "h",
|
|
"gt_answers": [
|
|
"H"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written in red in the photograph?",
|
|
"answer": "honda",
|
|
"gt_answers": [
|
|
"Honda"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000591.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the primary color of the parrot?",
|
|
"answer": "green",
|
|
"gt_answers": [
|
|
"Green"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000137420.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the second bus from the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"8257"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000515252.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What bank is in this picture?",
|
|
"answer": "bank of america",
|
|
"gt_answers": [
|
|
"Citizens Bank"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/1859.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what is present in top of the left corner",
|
|
"answer": "a basket of vegetables",
|
|
"gt_answers": [
|
|
"Ben spark"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000224759.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letters are in the bottom right of this label?",
|
|
"answer": "ef",
|
|
"gt_answers": [
|
|
"Z5"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002660.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what brand is this helmet?",
|
|
"answer": "ed hardy",
|
|
"gt_answers": [
|
|
"Doeri"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03127747_1420.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of lettuce is included in the package?",
|
|
"answer": "romaine lettuce",
|
|
"gt_answers": [
|
|
"Iceberg"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004881.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of the lotion?",
|
|
"answer": "body shop",
|
|
"gt_answers": [
|
|
"Bath & Body Works"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002192.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the scent of the lotion?",
|
|
"answer": "it smells like sweet peas",
|
|
"gt_answers": [
|
|
"Velvet Sugar"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002192.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of store is in the background?",
|
|
"answer": "comic book store",
|
|
"gt_answers": [
|
|
"Bank"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000295505.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is being advertised in the background?",
|
|
"answer": "lego",
|
|
"gt_answers": [
|
|
"Aegon"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2406522.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the owner's first love?",
|
|
"answer": "mom",
|
|
"gt_answers": [
|
|
"Mom"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2402502.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the auto show on the truck?",
|
|
"answer": "detroit water",
|
|
"gt_answers": [
|
|
"42 ANNUAL DETROIT AUTO SHOW"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the wooden panel say?",
|
|
"answer": "detroit water",
|
|
"gt_answers": [
|
|
"Quality motors since 1911"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2360185.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is after the word JOHN in this picture?",
|
|
"answer": "little",
|
|
"gt_answers": [
|
|
"LITTLE"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/img_658.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the street?",
|
|
"answer": "season street",
|
|
"gt_answers": [
|
|
"Sheldon St.",
|
|
"SHELDON ST"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the address of the street?",
|
|
"answer": "spencer street",
|
|
"gt_answers": [
|
|
"12400"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2344214.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the percentage of alcohol in the bottle?",
|
|
"answer": "12%",
|
|
"gt_answers": [
|
|
"14.5"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04591713_5298.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is she playing?",
|
|
"answer": "tennis",
|
|
"gt_answers": [
|
|
"Tennis"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2408817.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name?",
|
|
"answer": "swan street",
|
|
"gt_answers": [
|
|
"Swan",
|
|
"SWAN st"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000045335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the upper sign say?",
|
|
"answer": "no outlet",
|
|
"gt_answers": [
|
|
"SWAN ST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000045335.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word is written up the side of the building?",
|
|
"answer": "elang",
|
|
"gt_answers": [
|
|
"Bang"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/3611.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand name is on the bag?",
|
|
"answer": "banquet",
|
|
"gt_answers": [
|
|
"Banquet"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018091.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company owns the train?",
|
|
"answer": "toronto transit commission",
|
|
"gt_answers": [
|
|
"ttc"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355054.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the photographer of this picture?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"christian flores"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355054.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is title of the photo?",
|
|
"answer": "subway train",
|
|
"gt_answers": [
|
|
"RYAN'S TTC PHOTOS"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000355054.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What metal are the snaps made out of?",
|
|
"answer": "hardened steel",
|
|
"gt_answers": [
|
|
"Steel"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02916936_7426.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the warship shown?",
|
|
"answer": "uss midway",
|
|
"gt_answers": [
|
|
"U.S.S. Midway CVB-41"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02687172_21055.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What company is advertised on the boards?",
|
|
"answer": "coca cola",
|
|
"gt_answers": [
|
|
"Bank of America"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2411834.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "How much is acorn squash?",
|
|
"answer": "acorn squash is a type of squash",
|
|
"gt_answers": [
|
|
"99 lb."
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07717410_11533.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What farm market is this at?",
|
|
"answer": "santa monica farmers market",
|
|
"gt_answers": [
|
|
"Burris Farm Market"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07717410_11533.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What kind of onion is being sold?",
|
|
"answer": "yellow onion",
|
|
"gt_answers": [
|
|
"Sweet",
|
|
"Sweet Onion"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n07717410_11533.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company on the sign?",
|
|
"answer": "google",
|
|
"gt_answers": [
|
|
"Google"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000481.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name printed on the cup in the foreground?",
|
|
"answer": "yelp",
|
|
"gt_answers": [
|
|
"Yotel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the letter shown on the building in the background?",
|
|
"answer": "y",
|
|
"gt_answers": [
|
|
"W"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2347935.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the bank?",
|
|
"answer": "first national bank",
|
|
"gt_answers": [
|
|
"First national",
|
|
"FIRST NATIONAL BANK"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000577723.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the orange drink ?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Fanta"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000310518.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was this picture taken?",
|
|
"answer": "in the spring",
|
|
"gt_answers": [
|
|
"11:08 AM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000357235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What date was this picture taken",
|
|
"answer": "1 july 2015",
|
|
"gt_answers": [
|
|
"06/01/2011"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000357235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the opposite to YOUNGEST?",
|
|
"answer": "oldest",
|
|
"gt_answers": [
|
|
"OLDEST"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000329134.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year was this picture taken?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2010"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02107908_11083.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of sauce is in the picture?",
|
|
"answer": "hot sauce",
|
|
"gt_answers": [
|
|
"Tabasco"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2373519.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what time are today's specials?",
|
|
"answer": "the traffic light",
|
|
"gt_answers": [
|
|
"9-2pm",
|
|
"9-2 pm"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000449066.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What year is written on the disc?",
|
|
"answer": "2002",
|
|
"gt_answers": [
|
|
"2002"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_11817.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What letter is on the tennis racquets?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"W"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000353653.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in red on the sign on the left?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"exit"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000047972.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the company that made this vehicle?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"Dodge"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000380171.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written in green on the plane?",
|
|
"answer": "air france",
|
|
"gt_answers": [
|
|
"transavia"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358084.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the registration of the plane?",
|
|
"answer": "msn 002",
|
|
"gt_answers": [
|
|
"F-GZNC",
|
|
"F-GZMC"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2358084.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word on the mans shirt?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"be"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000362184.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the mans shirt say?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"be the change"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000362184.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the sign represent ?",
|
|
"answer": "bus stop",
|
|
"gt_answers": [
|
|
"Bus Stop"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2404132.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red sticker say?",
|
|
"answer": "funding war",
|
|
"gt_answers": [
|
|
"FUNDING WAR!"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2417731.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What website is being shown in front of the computer screeen?",
|
|
"answer": "youtube",
|
|
"gt_answers": [
|
|
"www.bizsyscon.com"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04152593_5249.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is on the white banner?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"basic"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000514186.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the black emblem on the far right?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"BILLABON"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000514186.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What grain does this food contain?",
|
|
"answer": "wheat",
|
|
"gt_answers": [
|
|
"Whole grain",
|
|
"WHOLE"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the red push pin say?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"Child hunger ends here",
|
|
"CHILD HUNGER ENDS HERE."
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What product is this?",
|
|
"answer": "pizza",
|
|
"gt_answers": [
|
|
"Coffee steamers"
|
|
],
|
|
"image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007235.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand is the keyboard?",
|
|
"answer": "logitech",
|
|
"gt_answers": [
|
|
"Logitech"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334019.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What type of media desktop is this?",
|
|
"answer": "a computer desktop",
|
|
"gt_answers": [
|
|
"Dinovo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2334019.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who has the copyright on the photo?",
|
|
"answer": "the photographer",
|
|
"gt_answers": [
|
|
"Robert J. Steiner"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n02776631_24991.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed word on the white pot?",
|
|
"answer": "love",
|
|
"gt_answers": [
|
|
"Love"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2364598.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the brand of camera?",
|
|
"answer": "canon",
|
|
"gt_answers": [
|
|
"Canon"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04069434_4504.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number of the parking meter?",
|
|
"answer": "4",
|
|
"gt_answers": [
|
|
"303"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03891332_5430.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "what color is the hat?",
|
|
"answer": "white",
|
|
"gt_answers": [
|
|
"white"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2328765.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the number on the bumper of the school bus?",
|
|
"answer": "0",
|
|
"gt_answers": [
|
|
"253"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2372661.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the street name on the bottom sign?",
|
|
"answer": "wilson street",
|
|
"gt_answers": [
|
|
"Fun street",
|
|
"FUN"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/2/2415089.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written on the billboard closest to the stop sign?",
|
|
"answer": "it is a billboard",
|
|
"gt_answers": [
|
|
"American Apparel"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2326693.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Who is the green sponsor of the hockey team?",
|
|
"answer": "nhl",
|
|
"gt_answers": [
|
|
"Rasmussen college"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_59273.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What number is the player Makaroy?",
|
|
"answer": "17",
|
|
"gt_answers": [
|
|
"26"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_59273.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What beer brand sponsors this team?",
|
|
"answer": "bud light",
|
|
"gt_answers": [
|
|
"Bud light"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n04019541_59273.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the date and time written in the notebook?",
|
|
"answer": "the date and time is written in the notebook",
|
|
"gt_answers": [
|
|
"8/23/10 12:55 PM"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365879.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What language is being studied?",
|
|
"answer": "german",
|
|
"gt_answers": [
|
|
"SPANISH"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000365879.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What are the words printed in the top area, describing the brand associated with the toy?",
|
|
"answer": "doctor who",
|
|
"gt_answers": [
|
|
"Doctor Who"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317268.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the message in white lettering, at the bottom of the toy's packaging?",
|
|
"answer": "do not drink and drive",
|
|
"gt_answers": [
|
|
"Try Me"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2317268.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is written at the bottom of the poster on teh right?",
|
|
"answer": "i love you",
|
|
"gt_answers": [
|
|
"SHORTBUS"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2356182.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is printed on the sweatshirt?",
|
|
"answer": "shinobi",
|
|
"gt_answers": [
|
|
"Shiamak"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368645.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the first word after the copyright symbol?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"Majomo"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368645.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the second word after the copyright symbol?",
|
|
"answer": "",
|
|
"gt_answers": [
|
|
"Photography"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2368645.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the store?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"COLDCORNER"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000069486.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "When was the photo copyrighted?",
|
|
"answer": "2010",
|
|
"gt_answers": [
|
|
"2011"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319670.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the photographer?",
|
|
"answer": "michael keller",
|
|
"gt_answers": [
|
|
"Miguel Navaza"
|
|
],
|
|
"image_path": "./data/STVQA/VisualGenome/1/2319670.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the network of hospitals advertised?",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"AM+Ri"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000029909.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What two words are in red letters?",
|
|
"answer": "coco rabbit",
|
|
"gt_answers": [
|
|
"Coco Rabbit"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594734_45012.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Which word is the name of an animal?",
|
|
"answer": "rabbit",
|
|
"gt_answers": [
|
|
"Rabbit"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594734_45012.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What word has four letters?",
|
|
"answer": "rabbit",
|
|
"gt_answers": [
|
|
"Coco"
|
|
],
|
|
"image_path": "./data/STVQA/imageNet/n03594734_45012.JPEG",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What brand of airline is this?",
|
|
"answer": "american airlines",
|
|
"gt_answers": [
|
|
"American"
|
|
],
|
|
"image_path": "./data/STVQA/IIIT_text/img_000096.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the movie advertised on the bus?",
|
|
"answer": "kingsman",
|
|
"gt_answers": [
|
|
"prisoners"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000114204.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "Whats the first name on they cubical",
|
|
"answer": "person",
|
|
"gt_answers": [
|
|
"barbara"
|
|
],
|
|
"image_path": "./data/STVQA/coco-text/COCO_train2014_000000371295.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What does the business to the left sell?",
|
|
"answer": "beer",
|
|
"gt_answers": [
|
|
"Chocolate"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_244.jpg",
|
|
"model_name": "BLIP2"
|
|
},
|
|
{
|
|
"question": "What is the name of the business?",
|
|
"answer": "tax brewers",
|
|
"gt_answers": [
|
|
"AX BRENNER CHOCOLATE BAR"
|
|
],
|
|
"image_path": "./data/STVQA/icdar/test_img_244.jpg",
|
|
"model_name": "BLIP2"
|
|
}
|
|
] |