def __init__(self, config): self.config = config self.bs = PyBingWebSearch("YOUR ID") self.cooldown = self.config["p_googlesleep"]; self.results_per_page = int(self.config["p_results_per_query"]); if (self.config["p_skippages"] > 0): print("Bing Scanner will skip the first %d pages..."%(self.config["p_skippages"]))
def write_text(): with open(riots_file) as listofriots: for i, l in enumerate(listofriots): if i < 0: # to pass the ones we already have continue try: print "looking at riot %d" % i name = l.strip() # name = l.split("-")[1].strip().split('(')[0].strip() bing_news = PyBingWebSearch(api_key, name) news = bing_news.search(limit=5, format='json') time.sleep(3) try: shutil.rmtree("riots_bing/riot_%02d" % i) except OSError: pass os.mkdir("riots_bing/riot_%02d" % i) for j, new in enumerate(news): with open('riots_bing/riot_%02d/%02d.txt' % (i, j), 'w') as riot_file: riot_file.write( new.title.encode('utf-8').strip() + '\n') riot_file.write( new.description.encode('utf-8').strip()) except: continue
def searchBing(text): auth_key = '1KN2M8IdjS+AsXS7+s9NXFRw1vIcHO/awnbyF1+WjEs' search_term = text bing_web = PyBingWebSearch(auth_key, search_term) first_ten_result= bing_web.search(limit=10, format='json') #1-10 title_urls = [Result(res.title, res.url) for res in first_ten_result] return title_urls
def getSeedPagesFromBing(self): try: bingWeb = PyBingWebSearch(self.apiKey, self.searchTerm) searchResults = bingWeb.search(limit=10, format="json") return searchResults except Exception as e: crawlerLogger.error("Failed to get seeds. Error:" + str(e))
def searchBing(): search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % ( orgname, orgname.lower()) bing_web = PyBingWebSearch(B_API_KEY, search_term) result = bing_web.search(limit=50, format='json') counter = 0 while counter < 50: try: regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname) if debug: print "DEBUG: Bing[", counter, "] first regex_string is: ", regex_string m = re.search('\.\s([\w\s]*\sat\sReliaQuest)', result[counter].description, re.IGNORECASE) if debug: print "DEBUG: Bing[", counter, "] raw results:" print "title: ", result[ counter].title, " description: ", result[ counter].description if m == None: if debug: print "DEBUG: Bing[", counter, "] first regex returned 'None'" regex_string = "'^.*at\s%s\.'" % (orgname) if debug: print "DEBUG: Bing[", counter, "] second regex_string is: ", regex_string m = re.search('^.*at\sReliaQuest\.', result[counter].description, re.IGNORECASE) if m == None: if debug: print "DEBUG: Bing[", counter, "] second regex returned 'None'" counter += 1 continue else: pass if debug: print "DEBUG: Bing [", counter, "] full regex match: ", str( m.group()) stdout.write('Name: ') stdout.write( str(re.sub(' \| LinkedIn', ',', result[counter].title))) stdout.write(' Role: ') try: stdout.write(str(m.group(1))) except IndexError: stdout.write(str(m.group())) pass if verbose: stdout.write(' VERBOSE_URL: ') stdout.write(result[counter].url) stdout.write("\n") counter += 1 except IndexError as e: if verbose: print "INFO: No additional Bing Search Results available" break except Exception as e: print "Bing_ERROR: Something strange happened, printing error: " print e exit()
def extract_snippet(prompt): API_KEY = "0nNf/RGQhw/62syJrJGDRbm4BUx4fwkyDYpiFLBobCo" bing_web = PyBingWebSearch(API_KEY, prompt, web_only=False) first_fifty_result= bing_web.search(limit=50, format='json') bing_result = [] for result in first_fifty_result: bing_result.append(result.description) return bing_result
def searchBing(): search_term = "site:linkedin.com instreamset:(url):\"/pub/\" -instreamset:(url):\"/dir/\" && (\"at %s\" || \"at %s\")" % (orgname,orgname.lower()) bing_web = PyBingWebSearch(B_API_KEY, search_term) result = bing_web.search(limit=50, format='json') counter = 0 while counter < 50: try: regex_string = "'\.\s([\w\s]*\sat\s%s)'" % (orgname) if debug: print "DEBUG: Bing[", counter,"] first regex_string is: ", regex_string m = re.search('\.\s([\w\s]*\sat\sReliaQuest)', result[counter].description, re.IGNORECASE) if debug: print "DEBUG: Bing[", counter,"] raw results:" print "title: ", result[counter].title, " description: ", result[counter].description if m == None: if debug: print "DEBUG: Bing[", counter,"] first regex returned 'None'" regex_string = "'^.*at\s%s\.'" % (orgname) if debug: print "DEBUG: Bing[", counter,"] second regex_string is: ", regex_string m = re.search('^.*at\sReliaQuest\.', result[counter].description, re.IGNORECASE) if m == None: if debug: print "DEBUG: Bing[", counter,"] second regex returned 'None'" counter+=1 continue else: pass if debug: print "DEBUG: Bing [", counter, "] full regex match: ", str(m.group()) stdout.write('Name: ') stdout.write(str(re.sub(' \| LinkedIn', ',', result[counter].title))) stdout.write(' Role: ') try: stdout.write(str(m.group(1))) except IndexError: stdout.write(str(m.group())) pass if verbose: stdout.write(' VERBOSE_URL: ') stdout.write(result[counter].url) stdout.write("\n") counter+=1 except IndexError as e: if verbose: print "INFO: No additional Bing Search Results available" break except Exception as e: print "Bing_ERROR: Something strange happened, printing error: " print e exit()
def getTopTen(query): # print 'length', len (encounteredUrls) # print 'pages = ',pages top_ten_urls = [] # hold the initial urls bing = PyBingWebSearch('mMlCxUd5qmU5uDJ1w1VLbDkobVK905A9cZZhYkfqGHg=',query,web_only=False) first_ten_results = bing.search(limit=20, format='json') #1-50 #urlList, next_uri = bing.search(query, limit=10, format='json') # get the results counter = pages # count number of urls for result in first_ten_results: checkUrl(result.url,top_ten_urls) if (pages - counter >=10): # only care about top 10 break return top_ten_urls
def get_results_ids_fb(query): search_term = query + " site:facebook.com/events" bing_web = PyBingWebSearch(API_KEY, search_term) results = bing_web.search(limit=50, format='json') set_ids = set([]) while len(results) > 0: print(len(results)) set_ids = extract_pages(set_ids, results) if len(results) < 50: results = [] else: results = bing_web.search(limit=50, format='json') #1-50 ids_list = list(set_ids) return (ids_list)
def webQuery(self, query, result_num=10): # format query based on OData protocol and desired JSON format of results full_query = query.replace(' ', '+') logging.debug('Sending following URL query: ' + full_query) print('%-20s= %s' % ("URL", full_query)) bing_web = PyBingWebSearch(self.__i_accountKey, full_query, web_only=False) first_n_result = bing_web.search(limit=result_num, format='json') return first_n_result
def BingSearch(keyword): urls = [] bing_web = PyBingWebSearch(GetTheConfig('bing', 'Key'), keyword, web_only=False) results = bing_web.search(limit=int(GetTheConfig('bing', 'QUANTITY')), format='json') for result in results: url = result.url urls.append(url) results = DeduplicateValue(urls) urls = [] for result in results: urls.append({"Bing": result}) return urls
def getfrombing(self, apikey, text, limit, operation): if operation == 'moderateimagesearch': bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Moderate'") elif operation == 'strictimagesearch': bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Strict'") elif operation == 'adultimagesearch': bing_obj = PyBingImageSearch(apikey, text, custom_params="&Adult='Off'") elif operation == 'websearch': bing_obj = PyBingWebSearch(apikey, text, web_only=False) elif operation == 'videosearch': bing_obj = PyBingVideoSearch(apikey, text) elif operation == 'newssearch': bing_obj = PyBingNewsSearch(apikey, text) result = bing_obj.search(limit=limit, format='json') return result
def bing_search(search_term): latitude = settings['bing'].get('latitude') longitude = settings['bing'].get('longitude') custom_params = { # 'Sources': "'" + settings['bing']['sources'] + "'", 'Market': "'" + settings['bing']['market'] + "'", } if latitude is not None and longitude is not None: custom_params.update({ 'Latitude': latitude, 'Longitude': longitude, }) custom_params_str = "".join( ["&" + k + "=" + v for k, v in iter(custom_params.items())]) bing_web = PyBingWebSearch( settings['bing']['api_key'], search_term, web_only=False, custom_params=custom_params_str, ) # web_only is optional, but should be true to use your web only quota instead of your all purpose quota return bing_web.search(limit=int(settings['bing']['results_limit']), format='json')
def compare(request): if request.method == 'POST': form = search(request.POST) if form.is_valid(): product = form.cleaned_data['querry'] API_KEY = "8eFYvQ0mCr06A3YoUZV9XK7867AgLLDeLuBdhILm+3c" querry = "buy " + product bing_web = PyBingWebSearch(API_KEY, querry, web_only=False) results = bing_web.search(limit=50, format='json') fkart_urls = [] fkart_price_ar = [] sdeal_urls = [] sdeal_price_ar = [] min_fkart = 0 min_sdeal = 0 for result in results: comp = result.url.split('.')[1] if comp == 'flipkart': p = ' ' try: p = result.url.split('/')[4] except: continue if p == 'p': fkart_urls.append(result.url) fkart_flag = 1 if comp == 'snapdeal': p = ' ' try: p = result.url.split('/')[3] except: continue if p == 'product': sdeal_urls.append(result.url) sdeal_flag = 1 if len(fkart_urls) == 0 and len(sdeal_urls) == 0: result = 'Search Failed!' context = RequestContext(request, {'result': result}) return render_to_response('home.html', context) else: for url in fkart_urls: fkart_url = url fkart_page = requests.get(fkart_url) fkart_html = fkart_page.text fkart_soup = bs(fkart_html, 'html.parser') meta_desc = fkart_soup.findAll( attrs={"name": "Description"}) meta_desc_content_split = meta_desc[0]['content'].split( " ") for_bool = 0 For_bool = 0 try: for_index = meta_desc_content_split.index('for') except: for_bool = 1 try: for_index = meta_desc_content_split.index('For') except: For_bool = 1 if for_bool == 0 or For_bool == 0: str_price = meta_desc_content_split[for_index + 1] if (str_price == 'Rs.'): fkart_price = meta_desc_content_split[for_index + 2] fkart_price_ar.append(fkart_price) else: fkart_price = str_price[3:] fkart_price_ar.append(fkart_price) for url in sdeal_urls: sdeal_url = url sdeal_page = requests.get(sdeal_url) sdeal_html = sdeal_page.text sdeal_soup = bs(sdeal_html, 'html.parser') input_tag = sdeal_soup.find_all('input', id='productPrice') ex = 0 try: str_price = input_tag[0]['value'] except: ex = 1 if (ex != 1): sdeal_price_ar.append(str_price) if (len(fkart_price_ar) > 0): min_fkart = fkart_price_ar[0] for price in fkart_price_ar: if (price > min_fkart): min_fkart = price if (len(sdeal_price_ar) > 0): min_sdeal = sdeal_price_ar[0] for price in sdeal_price_ar: if (price > min_sdeal): min_sdeal = price result = 'Search Succesful!' context = RequestContext( request, { 'form': form, 'result': result, 'flipkart_price': str(min_fkart), 'snapdeal_price': str(min_sdeal) }) return render_to_response('home.html', context) else: form = search() context = RequestContext(request, {'form': form}) return render_to_response('home.html', context)
f = open("D:\Projects\hackathon\content2.txt", "r") s = f.read() from py_bing_search import PyBingWebSearch ''' search_term = "Python Software Foundation" bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) first_fifty_result= bing_web.search(limit=6, format='json') #1-50 #second_fifty_result= bing_web.search(limit=50, format='json') #51-100 ''' ##s from correct import * s = corrections(s) search_term = s bing_web = PyBingWebSearch('1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', search_term, web_only=False) x = bing_web.search(limit=6, format='json') results = [] for i in range(len(x)): results += [(x[i].title, x[i].description, x[i].url)] from py_bing_search import PyBingImageSearch bing_image = PyBingImageSearch( '1jJ4jrq6jGMZl9fWXwVT9DiaIAZjDdNiByM/hdcstfI', s, image_filters='Size:medium+Color:Monochrome') #image_filters is optional photos = bing_image.search(limit=6, format='json')
def get_top_bing_goodreads_search(search_term): query = "site:goodreads.com {0}".format(search_term) bing_web = PyBingWebSearch(BING_SEARCH_API_KEY, query, web_only=False) results = bing_web.search(limit=50, format='json') return [r.url for r in results if 'goodreads.com/book/show/' in r.url]
def search(search_term): bing_web = PyBingWebSearch(s5, search_term,web_only=False) first_ten_result = bing_web.search(limit=10, format='json') return first_ten_result
#coding=utf-8 from py_bing_search import PyBingWebSearch search_term = "site:cert.org.cn" bing_web = PyBingWebSearch('6I7UKjtX4bFiCDO0eQr4N4ErGG1+10BSWTmt0/aQ9QE', search_term, web_only=False) # web_only is optional, but should be true to use your web only quota instead of your all purpose quota first_fifty_result= bing_web.search(limit=50, format='json') #1-50 second_fifty_result= bing_web.search(limit=50, format='json') #51-100 # 显示标题 second_fifty_result[0].description) # 显示url second_fifty_result[0].url) '''for x in xrange(1,int(len(second_fifty_result))): print second_fifty_result[x].url ''' '''for x in xrange(1,int(len(first_fifty_result))): print first_fifty_result[x].url,first_fifty_result[x].title ''' for y in (first_fifty_result,second_fifty_result): for x in xrange(1,int(len(y))): print y[x].url pass
def test_search_all(self): web_bing = PyBingWebSearch(SECRET_KEY, "Python Software Foundation") result_one = web_bing.search_all(limit=60) self.assertTrue(len(result_one) == 60) self.assertTrue("Python" in result_one[0].title)
def _GetMovieResearch(self, term, limit=50, format='json'): bing = PyBingWebSearch(self.BING_API_KEY, term, web_only=False) return bing.search(limit, format)
class bingScan: def __init__(self, config): self.config = config self.bs = PyBingWebSearch("YOUR ID") self.cooldown = self.config["p_googlesleep"]; self.results_per_page = int(self.config["p_results_per_query"]); if (self.config["p_skippages"] > 0): print("Bing Scanner will skip the first %d pages..."%(self.config["p_skippages"])) def startGoogleScan(self): print("Querying Bing Search: '%s' with max pages %d..."%(self.config["p_query"], self.config["p_pages"])) pagecnt = 0 curtry = 0 last_request_time = datetime.datetime.now() while(pagecnt < self.config["p_pages"]): pagecnt = pagecnt +1 redo = True while (redo): try: current_time = datetime.datetime.now() diff = current_time - last_request_time diff = int(diff.seconds) if (diff <= self.cooldown): if (diff > 0): print("Commencing %ds bing cooldown..." %(self.cooldown - diff)) time.sleep(self.cooldown - diff) last_request_time = datetime.datetime.now() resp = self.bs.search_web(self.config["p_query"], {'Web.Count':50,'Web.Offset':(pagecnt-1)*self.results_per_page}) results = resp['SearchResponse']['Web']['Results'] redo = False except KeyboardInterrupt: raise except Exception as err: raise redo = True sys.stderr.write("[RETRYING PAGE %d]\n" %(pagecnt)) curtry = curtry +1 if (curtry > self.config["p_maxtries"]): print("MAXIMUM COUNT OF (RE)TRIES REACHED!") sys.exit(1) curtry = 0 if (len(results) == 0): break sys.stderr.write("[PAGE %d]\n" %(pagecnt)) try: for r in results: single = singleScan(self.config) single.setURL(r["Url"]) single.setQuite(True) single.scan() except KeyboardInterrupt: raise time.sleep(1) print("Bing Scan completed.")
def _getData(data, name=1): srch_title = "" if data['title'] != u"": srch_title = "\"%s\"" % data['title'] if data['site'] == u"": data['site'] = "www.linkedin.com/in/" com_name = data["company"] if com_name != "": com_name = "Current: %s" % com_name srch_location = "" if data['location'] != u"": srch_location = "Location %s" % data['location'] search_term = "site:%s %s %s %s" % (data['site'], com_name, srch_title, srch_location) print search_term # get data using bing api bing_web = PyBingWebSearch(settings.BING_API_KEY, search_term.strip()) result = bing_web.search_all(limit=100, format='json') #1-50 name_tp = data['name'].split(" ") first_name = name_tp[0] last_name = "" if(len(name_tp) > 1): last_name = name_tp[1] res = [] index = 0 for item in result: index += 1 '''title_tp = item.title if title != u"": title_tp = title''' description = item.description.replace(",", " ") description = description.replace("\"", " ") if "| LinkedIn" in item.title: temp = item.title temp = temp.split("|")[0].strip().split(" ") first_name = temp[0] last_name = temp[-1] item.title = "" if name == 0: first_name = "" last_name = "" # get title and company from a search result. ps_des = parse_str(description) res_company = "" if ps_des is not None: res_company = ps_des[1].strip() item.title = ps_des[0].strip() # get location and industry from a result res_location = "" industry_str = "" ps_ind = parse_str(description, "Industry") if ps_ind is not None: res_location = ps_ind[0].replace("Location", "").strip() industry_str = ps_ind[1].strip() # get score of location full_location = res_location res_location = res_location.replace("Area", "") res_location = res_location.replace("Industry", "") res_location = res_location.replace("Greater", "").strip() score_location = "No" if res_location != "" and (res_location == data['location'] or res_location in data['location'] or data['location'] in res_location): score_location = "Yes" # get score of company score_company = "No" if res_company != "" and res_company == data['company'] and res_company: score_company = "Yes" # get score score = 3 if index < 4: score = 1 elif index < 6: score = 2 # get current company curr_com = parse_curr_company(description) # get education education = parse_curr_company(description, "Education:") # get date and timestamp time_stamp = time.time() date = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') if data['name'] == "" or (data['name'] != "" and data['name'] == "%s %s" % (first_name, last_name)): line = '"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%d","%s","%s","%s"\n' % \ (data['siteid'], data['company'], data['title'], data['location'], first_name, last_name, item.title, \ res_company, res_location, full_location, industry_str, item.url, curr_com, education, date, \ score, score_location, score_company, description) line = line.encode("utf8") line = line.replace("\u00E2\u20AC\u2122", "") res.append(line) return res