def bing(request): """ Search bing using a paid API. --- type: translated: type: string parameters: - name: query description: search query required: true type: string paramType: form - name: tx description: Transaction Id (proof of payment) type: string paramType: query """ if 'query' not in request.data: return Response({"error": "Must provide a 'query' parameter."}, status=status.HTTP_400_BAD_REQUEST) api = BingSearchAPI(settings.AZURE_MARKETPLACE_KEY) result = api.search_web(request.data['query'], payload={'$format': 'json'}) if result.ok: return Response({"results": result.text}) else: return Response({"error": result.text}, status=status.HTTP_400_BAD_REQUEST)
def query(query_string): bing = BingSearchAPI(my_key) params = { 'ImageFilters': '"Face:Face"', '$format': 'json', '$top': 10, '$skip': 0 } results = bing.search('web', query_string, params).json() # requests 1.0+ return [result['Url'] for result in results['d']['results'][0]['Web']]
def get_actor_url(actor_name): bing = BingSearchAPI(BING_KEY) params = { 'ImageFilters': '"Face:Face"', '$format': 'json', '$top': 1, '$skip': 0 } actor_name = actor_name.encode('utf-8') data = bing.search('image', actor_name, params).json() return data['d']['results'][0]['Image'][0]['Thumbnail']['MediaUrl']
def bing_search_total(_verbose, _search_phrase, _bing_api_key): _search_phrase_parsed = "%22" + _search_phrase.replace(' ', '+').strip( ' ') + "%22" # %22 acts as quotes, facilitating a phrase search _bing_search = BingSearchAPI(_bing_api_key) _bing_parameters = {'$format': 'json', '$top': 2} try: res = _bing_search.search('web', _search_phrase_parsed, _bing_parameters).json() total_search_results = res["d"]["results"][0]["WebTotal"] total = int(total_search_results) if (isinstance(total, int)): if _verbose: print('\t' + _search_phrase_parsed.replace('+', ' ').replace( '%22', '') + total) pass return total except Exception as e: if _verbose: print('\tERROR: in bing.search() - search total\n\t' + str(e)) print('\tERROR: in bing.search() - search total\n\t' + str(e)) print("[Errno {0}] {1}".format(e.errno, e.strerror)) return 0
from bing_search_api import BingSearchAPI import json my_key = "8jhH8TwVCHdDiWxXYgC5KqyEmChYTKW0kkFngbVYnH8" query_string = "Sony" bing = BingSearchAPI(my_key) params = {'$format': 'json', '$top': 10, '$skip': 0} news = bing.search('news', query_string, params).json() for i in range(10): print(news['d']['results'][0]['News'][i]) #news = json.loads(bing.search('news', query_string, params).json())
from secret_config import secret_config from bing_search_api import BingSearchAPI search_api = BingSearchAPI(secret_config['BING_API_KEY']) def query(q, sources='web'): params = {"$format": "json", "$top": 20} response = search_api.search(sources, q.encode('utf-8'), params) return response.json()
def request_image(window, keyword, num_of_try=0, translate=True): """ Queries Bing for images and retries up to 5 times if the randomly selected image could not be accessed :param keyword: string which specifies the image content :param num_of_try: internal parameter that increases if the selected image could not be retrieved (e.g. Forbidden Error) :param translate: Should the keyword be translated to english before the search? (may increase result size) :return: The image data in bytes """ if keyword is None: return None if translate: ms_key = open('../ms.key').read() trans = Translator('__RealTimeStoryIllustrator__', ms_key) translatedkw = trans.translate(keyword, lang_from='de', lang_to='en') print("IMAGE SERVICE: Getting image for " + str(keyword) + ". Searched for the english translation '" + str(translatedkw) + "'.") else: translatedkw = keyword print("IMAGE SERVICE: Getting image for " + str(keyword) + ".") if num_of_try > 5: # no images were found logger.error( "IMAGE SERVICE: Could not find an image after 5 tries for " + str(translatedkw) + ".") return None # OLD CODE FOR SEARCHING BEGIN # term = urllib.parse.quote_plus(translatedkw) # sites = [line.rstrip() for line in # open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'sites.txt'), # encoding="utf-8")] # excludedsites = "" # for site in sites: # excludedsites = excludedsites + "-site:" + urllib.parse.quote_plus(site) + '%20' # img_type = '%7Eillustration+AND+clipart' # opener = urllib.request.build_opener() # opener.addheaders = [('User-agent', 'Mozilla/5.0')] # url = ('http://ajax.googleapis.com/ajax/services/search/images?' + # 'v=1.0&q=' + term + '%20' + img_type + '%20' + excludedsites + '%20&userip=91.141.0.105' + # '&rsz=8&imgsz=medium&safe=active' + '&tbs=ic:color') # OLD CODE FOR SEARCHING END try: params = { '$format': 'json', '$top': 10, 'ImageFilters': '\'Size:Small\'' } bing_key = open('../bing.key').read() api = BingSearchAPI(bing_key) result = api.search_image( str(translatedkw + '+AND+(illustration+OR+clipart)'), params) amount = len(result.json()['d']['results']) # print(json.dumps(result.json(), sort_keys=True, indent=2)) # print(result.json()) # print(result.json()['d']['results'][0]['MediaUrl']) img_num = random.randint(0, amount - 1) data = urllib.request.urlopen( result.json()['d']['results'][img_num]['MediaUrl'], timeout=2).read() return data except Exception as e: # have to catch everything since socket exceptions seem to be broken print("ERROR in IMAGE SERVICE: Trying again, request was denied " + str(e)) return request_image(window, keyword, num_of_try + 1, translate=translate)
def Collocations_Method_2(_bing_api_key, _n_grams_from_input_text_file, _input_file_path, _apply_POS_restrictions, _verbose): if _verbose: # A file to save the verbose output of the program _output_file_verbose = str(_input_file_path).replace(_input_file_path.split('/')[-1], 'verbose.txt') _output_file_verbose = open(_output_file_verbose, 'a') print("\n--------------------------------------------------------------------------", file=_output_file_verbose) print("\tMethod-2: Title-Url - Extracting collocations:", file=_output_file_verbose) print("--------------------------------------------------------------------------\n\n", file=_output_file_verbose) print("\tMethod-2: Title-Url - Extracting collocations ...") # A list to store n-gram phrases that are collocations title_url_collocations = [] # A list to store n-gram phrases that are not collocations n_grams_not_collocations = [] # Snowball stemmer is used to stem words stemmer = snowballstemmer.stemmer('english') # Call to Bing search API _bing_search = BingSearchAPI(_bing_api_key) _bing_search_parameters = {'$format': 'json', '$top': 10} # Top 10 search results # Python list with words synonymous to 'Wikipedia', 'dictionary', 'definition' _list_of_synonymous_words = ['dictionary', 'lexicon', 'definition', 'meaning', 'unabridged', 'gazetteer' \ 'spellchecker', 'spellingchecker', 'thesaurus', 'synonymfinder', 'wordfinder', 'wikipedia', 'investorwords' \ 'investopedia', 'wiktionary'] for _n_gram in _n_grams_from_input_text_file: if _verbose: print("\n%s:" % (_n_gram), file=_output_file_verbose) if _n_gram in title_url_collocations or _n_gram in n_grams_not_collocations: # If a particular n-gram phrase is checked if it is a collocation before, # it will be present in one of the lists, wordnet_collocations OR n_grams_not_collocations # Hence, we move on to the next n-gram / phrase continue else: # Before checking if the n-gram is a collocation we check if atlease one # POS tag is from the valid POS tag list: {Noun, Verb, Adverb, Adjective} if # _apply_POS_restrictions is set to True if _apply_POS_restrictions: valid_POS_tags = ['NN', 'VB', 'RB', 'JJ'] _valid_POS_tag_counter = 0 # A counter to count the number of valid POS tags in n-gram for _pos_tag in valid_POS_tags: if _pos_tag in _n_gram: _valid_POS_tag_counter += 1 if _valid_POS_tag_counter == 0: # If no valid POS tag is present in the n-gram, it is not a collocation # when POS restrictions are applied n_grams_not_collocations.append(_n_gram) if _verbose: print("\t'%s' does not have valid POS tags\n\tMoving on to the next phrase ..." % (_n_gram), file=_output_file_verbose) continue # We move on to the next phrase # If POS restrictions are not to be applied on the n-gram _n_gram_lower = _n_gram.lower() + ' ' # Lower case _n_gram_lower = re.sub(r'_.*? ', ' ', _n_gram_lower).rstrip(' ') _n_gram_lower_search_phrase = 'define "%s"' % (_n_gram_lower) # Bing - Phrase search try: _search_results = _bing_search.search('web', _n_gram_lower_search_phrase, _bing_search_parameters).json() _search_result_count = len(_search_results["d"]["results"][0]["Web"]) except Exception as e: if _verbose: print("\tERROR: Method-2 - Bing search - Title-Url\n%s" % (str(e)), file=_output_file_verbose) print("\tERROR: Method-2 - Bing search - Title-Url\n%s" % (str(e))) _search_result_count = 0 continue # List to save top 10 search Titles _search_titles = [] # List to store top 10 search Urls _search_urls = [] # We iterate through each of the search result and append search titles and Urls to their respective lists for x in xrange(0, _search_result_count): _url = _search_results["d"]["results"][0]["Web"][x]["Url"] _title = _search_results["d"]["results"][0]["Web"][x]["Title"] _title = unicodedata.normalize('NFKD', _title).encode('ascii', 'ignore') _url = unicodedata.normalize('NFKD', _url).encode('ascii', 'ignore') _search_titles.append(_title) _search_urls.append(_url) # removing punctuation, special characters and spaces from the keyword _n_gram_lower_no_spaces = ''.join(_char for _char in _n_gram_lower if _char.isalnum()) _n_gram_lower_no_spaces = _n_gram_lower_no_spaces.replace(' ', '') _number_of_search_results_returned = len(_search_urls) # No. of search urls = titles # Variable to store the count of titles and urls that have valid keywords and match with the search phrase _number_of_valid_titles = 0 _number_of_valid_urls = 0 for x in xrange(0, _number_of_search_results_returned): _search_title = "" _search_title = _search_titles[x] _search_title_lower_case = _search_title.lower() _search_title_lower_case_no_spaces = "".join( _char for _char in _search_title_lower_case if _char.isalnum()) _search_url = "" _search_url = _search_urls[x] _search_url_lower_case = _search_url.lower() _search_url_lower_case_no_spaces = "".join(_char for _char in _search_url_lower_case if _char.isalnum()) if _verbose: print("\t%d:\n\tSearch title: %s\n\tSearch Url: %s" % (x + 1, _search_title, _search_url), file=_output_file_verbose) for _synonym in _list_of_synonymous_words: _synonym_match = False # Check if _synonym is present in the tile _title_match = re.search(_synonym, _search_title_lower_case_no_spaces) # check if _synonym is present in the url _url_match = re.search(_synonym, _search_url_lower_case_no_spaces) # If a match is found either in title or the url, open the link and check if the # <title> </title> tag from the html has a match with the keyword if _title_match: _synonym_match = True elif _url_match: _synonym_match = True else: continue if _synonym_match: # Reading HTML from url try: # replace: _url_response = urllib2.urlopen(_search_url) # _url_response = urllib2.urlopen(_search_url) http = httplib2.Http(".cache") resp, _url_response = http.request(_search_url, "GET") _html = _url_response # print(_html) _beautiful_html = BeautifulSoup(_html, "lxml") except Exception as e: if _verbose: print("\tException - Method-2 - Reading HTML\n%s" % (str(e)), file=_output_file_verbose) print("\tException - Method-2 - Reading HTML\n%s" % (str(e))) # print(e.fp.read()) print("-----------------\n" + _search_url + "\n---------------\n") # Extracting text in between <h1> tag try: # Comments are to excluded, this part is to coded # _text_from_title = _beautiful_html.find('h1').text # print(_beautiful_html.find('h1').text + "\n") # print("sss" + _beautiful_html.title.string + '\n') _text_from_title = _beautiful_html.title.string # Remove any non-ascii characters from the text extracted _text_from_title_ascii_only = "".join( _char for _char in _text_from_title if ord(_char) < 128) _text_from_title_ascii_only = _text_from_title_ascii_only.lower() except: # If failed to extract text from <h1> _text_from_title_ascii_only = "" """ # ------- FOR Stemmed match ------------ # Stem the title text extracted and the n-gram phrase # If the stemmed n-gram phrase is present in the stemmed title, # that n-gram phrase is a collocation _n_gram_lower_stemmed = "" for _word in _n_gram_lower.split(' '): _n_gram_lower_stemmed = " " + stemmer.stemWord(_word) _text_from_title_ascii_only_stemmed = "" for _word in _text_from_title_ascii_only.split(' '): _text_from_title_ascii_only_stemmed = " " + stemmer.stemWord(_word) if _verbose: print "\t\tStemmed search title: %s\n\t\tStemmed phrase: %s" %(_text_from_title_ascii_only_stemmed, _n_gram_lower_stemmed) if _n_gram_lower_stemmed in _text_from_title_ascii_only_stemmed: _number_of_valid_titles += 1 if _verbose: print "\t\t\tMatch" else: if _verbose: print "\t\t\tNot a match" # --------------------------------------- """ # ------------ FOR Exact title match ------------- if _verbose: print("\t\tSearch TITLE processed: %s\n\t\tPhrase processed: %s" % ( _text_from_title_ascii_only, _n_gram_lower), file=_output_file_verbose) if _n_gram_lower in _text_from_title_ascii_only: _number_of_valid_titles += 1 if _verbose: print("\t\t\tMatch", file=_output_file_verbose) else: if _verbose: print("\t\t\tNot a match", file=_output_file_verbose) # ------------------------------------------------ # Remove punctuation and numbers from Url and see if the n-gram / phrase is present in it # If yes, then that n-gram is a collocation _search_url_lower_case_no_spaces_no_punctuation = "".join( [_char for _char in _search_url_lower_case_no_spaces if not _char.isdigit()]) if _verbose: print("\t\tSearch URL processed: %s\n\t\tPhrase processed: %s" % ( _search_url_lower_case_no_spaces_no_punctuation, _n_gram_lower_no_spaces), file=_output_file_verbose) if _n_gram_lower_no_spaces in _search_url_lower_case_no_spaces_no_punctuation: _number_of_valid_urls += 1 if _verbose: print("\t\t\tMatch", file=_output_file_verbose) else: if _verbose: print("\t\t\tNot a match", file=_output_file_verbose) break else: continue if _number_of_valid_titles > 0 or _number_of_valid_urls > 0: title_url_collocations.append(_n_gram) if _verbose: print("\n\tTotal number of valid titles: %d\n\tTotal number of valid urls: %d\n\t- Collocation -\n" \ % (_number_of_valid_titles, _number_of_valid_urls), file=_output_file_verbose) else: n_grams_not_collocations.append(_n_gram) if _verbose: print("\t- Not a collocation -\n", file=_output_file_verbose) # Output text file to save collocations _output_file_path_title_url_collocations = str(_input_file_path).replace(_input_file_path.split('/')[-1], 'collocations_title_url.txt') _output_file_title_url_collocations = open(_output_file_path_title_url_collocations, 'w') for _collocation in title_url_collocations: _output_file_title_url_collocations.write(_collocation + '\n') _output_file_title_url_collocations.close() if _verbose: print("\nMethod-2: Title-Url - Collocations are written to the file:\n%s" % ( _output_file_path_title_url_collocations), file=_output_file_verbose) # Output text file to save n-grams that are not collocations _output_file_path_title_url_not_collocations = str(_input_file_path).replace(_input_file_path.split('/')[-1], 'not_collocations_title_url.txt') _output_file_title_url_not_collocations = open(_output_file_path_title_url_not_collocations, 'w') for _n_gram in n_grams_not_collocations: _output_file_title_url_not_collocations.write(_n_gram + '\n') _output_file_title_url_not_collocations.close() if _verbose: print("Method-2: Title-Url - N-grams that are not collocations are written to the file:\n%s" % ( _output_file_path_title_url_not_collocations), file=_output_file_verbose) if _verbose: print("\n--------------------------------------------------------------------------", file=_output_file_verbose) print("\tMethod-2: Title-Url - Extracting collocations - Complete", file=_output_file_verbose) print("--------------------------------------------------------------------------\n\n", file=_output_file_verbose) # Returning n-grams that are collocations and n-grams that are not if _verbose: print("\t\tMethod-2: Collocation extraction successful") return title_url_collocations, n_grams_not_collocations
result = bing.search('image', query_string, params).json() image_url = result['d']['results'][0]['Image'][random_index]['MediaUrl'] return image_url def getAmazonURL(item): """ A utility for retrieving the Amazon search results URL for some item. """ return "http://www.amazon.com/s?field-keywords={0}".format(urllib.quote_plus(item)) # # SETUP bing_key = 'api_key_here' angelListAPI = AngelListAPI() genderGuesserAPI = GenderGuesserAPI() googleBooksAPI = GoogleBooksAPI() googleResultsAPI = GoogleResultsAPI() thesaurusAPI = ThesaurusAPI() tweetSentimentAPI = TweetSentimentAPI() bing = BingSearchAPI(bing_key) APIS = [angelListAPI, genderGuesserAPI, googleBooksAPI, googleResultsAPI, thesaurusAPI, tweetSentimentAPI] CALCULATOR = Calculator(APIS)
import json from bing_search_api import BingSearchAPI #read the top keywords(usually 1-3) and generate the search keyword parameter k = [] with open("topkeywords.txt") as f: for line in f: k.append(line.strip()) s = ' '.join(k) n = 1000 #search result limit my_key = "uAZ6dYNEodLuQxx1W3UKkLegY+Uj8y7e1E3AxPwqtmM" #API key query_string = s #the query string. currently only has keyword parameter. bing = BingSearchAPI(my_key) #initialize search request params = {'$format': 'json'} #response format as json #output file f = open("bingresults.txt", "w") #get first 50 results from Bing for obj in bing.search('web', query_string, params).json()['d']['results']: for lnk in obj['Web']: f.write(lnk['Url']) f.write('\n') i = 50 #get the rest results while i < n: params = {'$format': 'json', '$skip': i} #skip first i results