def get_number_of_results(term, ajax=False, verbose=True): if not ajax: gs = GoogleSearch(term) page = str(gs._get_results_page()) match = reg.search(page) if match: if verbose: print(term, match.groups()[0]) return int(match.groups()[0].replace(',','')) else: raw_input((term, page)) return int(search(term)['responseData']['cursor']['estimatedResultCount'])
def run(self, string): query = "site:http://md5-database.org/md5 %s" % string #if not thread: # say("Querying Google: '%s'" % query) gs = GoogleSearch(query) gs.results_per_page = 10 results = gs._get_results_page() texts = results.findAll(text=True) texts = ''.join(texts) results = re.findall(re.compile('MD5\}.*?MD5'), texts) for line in results: if string in line: result = line[(line.find(',') + 1):line.find('.')].strip() return result return ''
def augmentedScoring(phrase) : #TODO hit count ist nicht ganz korrekt #=> more like: about .* results kann ueber die ganze seite gehen... #=> investigate regex/google site! hitScores = [] #without domains -> denominator searchStr = "\"" + phrase + "\"" gs = GoogleSearch(searchStr) gs.results_per_page = 50 sleep(searchSleepTime) score = 0 matchStrings = "" pageStrings = "" try: page = gs._get_results_page() logging.info(gs.last_search_url) pageStr = str(page) pageStrings += pageStr if pageStr.find("resultStats\">") != -1 or pageStr.find("No results found for") > -1: m = re.search(r'resultStats\">.*bout (.*) results</div', pageStr) if m is not None: score = m.group(1) matchStrings += m.group(0) + " - " logging.info("score (" + searchStr + "): " + score) score = score.replace(',','') hitScores.append(int(score)) else: logging.warning("No match! .. no google hits? (" + searchStr + ")") hitScores.append(0) for _ in domains: hitScores.append(0) hitScores.append("first: no match") hitScores.append(unicode(pageStrings, 'utf-8', "strict")) logging.info(hitScores) return hitScores else: logging.warning("No google hits! (" + searchStr + ")") hitScores.append(0) for _ in domains: hitScores.append(0) hitScores.append("first: no match") hitScores.append(unicode(pageStrings, 'utf-8', "strict")) logging.info(hitScores) return hitScores except SearchError as se: logging.warning("Search Error on: " + searchStr + " no results? " + str(se)) hitScores.append(0) for _ in domains: hitScores.append(0) hitScores.append("first: search error: " + str(se)) hitScores.append(unicode(pageStrings, 'utf-8', "strict")) logging.info(hitScores) return hitScores denominationScore = float(score) logging.info("denominator (" + searchStr + "): " + str(denominationScore)) #augmented with domains for domain in domains : searchStr = "\"" + phrase + "\" \"" + domain + "\"" gs = GoogleSearch(searchStr) gs.results_per_page = 50 sleep(searchSleepTime) score = 0 try: page = gs._get_results_page() logging.info(gs.last_search_url) pageStr = str(page) pageStrings += pageStr if pageStr.find("resultStats\">") != -1 : m = re.search(r'resultStats">.*bout (.*) results</div', pageStr) if m is not None: score = m.group(1) matchStrings += m.group(0) + " - " logging.info("score (" + searchStr + "): " + score) score = score.replace(',','') else: logging.warning("No match! .. no google hits? (" + searchStr + ")") else: logging.warning("No google hits! (" + searchStr + ")") except SearchError as se: logging.warning("Search Error on: " + searchStr + " no results? " + str(se)) #relativating by general hit count hitScores.append(float(score) / denominationScore) hitScores.append(matchStrings) hitScores.append(unicode(pageStrings, 'utf-8', "strict")) logging.info(hitScores) return hitScores