# USAGE # python search_bing_api.py --query "alan grant" --output dataset/alan_grant # python search_bing_api.py --query "ian malcolm" --output dataset/ian_malcolm # python search_bing_api.py --query "ellie sattler" --output dataset/ellie_sattler # python search_bing_api.py --query "john hammond jurassic park" --output dataset/john_hammond # python search_bing_api.py --query "owen grady jurassic world" --output dataset/owen_grady # python search_bing_api.py --query "claire dearing jurassic world" --output dataset/claire_dearing # import the necessary packages from requests import exceptions import argparse import requests import cv2 import os # construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-q", "--query", required=True, help="search query to search Bing Image API for") ap.add_argument("-o", "--output", required=True, help="path to output directory of images") args = vars(ap.parse_args()) # set your Microsoft Cognitive Services API key along with (1) the # maximum number of results for a given search and (2) the group size # for results (maximum of 50 per request) API_KEY = "INSERT_YOUR_API_KEY_HERE" MAX_RESULTS = 100 GROUP_SIZE = 50 # set the endpoint API URL URL = "https://api.cognitive.microsoft.com/bing/v7.0/images/search" # when attemping to download images from the web both the Python # programming language and the requests library have a number of # exceptions that can be thrown so let's build a list of them now # so we can filter on them EXCEPTIONS = set([IOError, FileNotFoundError, exceptions.RequestException, exceptions.HTTPError, exceptions.ConnectionError, exceptions.Timeout]) # store the search term in a convenience variable then set the # headers and search parameters term = args["query"] headers = {"Ocp-Apim-Subscription-Key" : API_KEY} params = {"q": term, "offset": 0, "count": GROUP_SIZE} # make the search print("[INFO] searching Bing API for '{}'".format(term)) search = requests.get(URL, headers=headers, params=params) search.raise_for_status() # grab the results from the search, including the total number of # estimated results returned by the Bing API results = search.json() estNumResults = min(results["totalEstimatedMatches"], MAX_RESULTS) print("[INFO] {} total results for '{}'".format(estNumResults, term)) # initialize the total number of images downloaded thus far total = 0 # loop over the estimated number of results in `GROUP_SIZE` groups for offset in range(0, estNumResults, GROUP_SIZE): # update the search parameters using the current offset, then # make the request to fetch the results print("[INFO] making request for group {}-{} of {}...".format( offset, offset + GROUP_SIZE, estNumResults)) params["offset"] = offset search = requests.get(URL, headers=headers, params=params) search.raise_for_status() results = search.json() print("[INFO] saving images for group {}-{} of {}...".format( offset, offset + GROUP_SIZE, estNumResults)) # loop over the results for v in results["value"]: # try to download the image try: # make a request to download the image print("[INFO] fetching: {}".format(v["contentUrl"])) r = requests.get(v["contentUrl"], timeout=30) # build the path to the output image ext = v["contentUrl"][v["contentUrl"].rfind("."):] p = os.path.sep.join([args["output"], "{}{}".format( str(total).zfill(8), ext)]) # write the image to disk f = open(p, "wb") f.write(r.content) f.close() # catch any errors that would not unable us to download the # image except Exception as e: # check to see if our exception is in our list of # exceptions to check for if type(e) in EXCEPTIONS: print("[INFO] skipping: {}".format(v["contentUrl"])) continue # try to load the image from disk image = cv2.imread(p) # if the image is `None` then we could not properly load the # image from disk (so it should be ignored) if image is None: print("[INFO] deleting: {}".format(p)) os.remove(p) continue # update the counter total += 1