from PIL import Image
import requests
train_file = 'data/Train%2FGCC-training.tsv' # train file
with open(train_file,'r') as f:
train_read = f.readlines()
sample_train = train_read[:10000]
train_map = {
line.split("\t")[1][:-1] : line.split("\t")[0] for line in sample_train
}
links = [k for k,v in train_map.items()]
not_read = 0 # keep a count of images that were not possible to read
# loop over the links and read whichever possible
for link in links:
try:
im = Image.open(requests.get(link, stream=True).raw)
except:
print(link)
not_read += 1
Here are some of the links that did not work.
From a sample of 10000, I was able to get at least 51 links that did not work.
Looking forward to hearing more from you guys.
Thanks!