def search(self, search_term: Text) -> WebInfo: ##[('Titulo','Resumen','URL')] try: html_text = Browser().getHTML( "https://www.metacrawler.com/serp?q=" + search_term.replace(' ', '+')) html = tree.HTML(html_text) titles = reduceXpath( html, "//div[@class='web-bing__result']//a[@class='web-bing__title']" ) titles = [normalize(t.xpath("string()")) for t in titles] contents = reduceXpath( html, "//div[@class='web-bing__result']//span[@class='web-bing__description']" ) contents = [normalize(x.xpath("string()")) for x in contents] links = html.xpath( "//div[@class='web-bing__result']//a[@class='web-bing__title']" ) links = [findurl(l.get('href')) for l in links] links = [l for l in links if not (l is None)] results = zip3(titles, contents, links, '') return results except: print("Metacrawle Fallo") pass return None
def search(self, search_term: Text) -> WebInfo: #Optional[(Titulo,Conten,Url)] try: html_text = Browser().getHTML("https://www.ask.com/web?q=" + search_term.replace(' ', '+')) html = tree.HTML(html_text) title = reduceXpath( html, "//div[@class='PartialSearchResults-body']//div[@class='PartialSearchResults-item']//a" ) title = [normalize(t.xpath("string(//a)")) for t in title] contents = reduceXpath( html, "//div[@class='PartialSearchResults-body']//div[@class='PartialSearchResults-item']//p[@class='PartialSearchResults-item-abstract']" ) contents = [normalize(x.xpath("string(//p)")) for x in contents] links = html.xpath( "//div[@class='PartialSearchResults-body']//div[@class='PartialSearchResults-item']//a" ) links = [findurl(l.get('href')) for l in links] links = [l for l in links if not (l is None)] results = zip3(title, contents, links, '') return results except: print("Ask Fallo") pass return None
def cleanTrivia( trivia: Trivia ) -> Optional[Tuple[Text, List[Text], List[List[Text]], bool]]: if trivia is None: return None pregunta, opciones = trivia # Proceso la pregunta words_question = normalize(pregunta).replace('?', '') token_question = tokenize(words_question).split(' ') if token_question[0] == 'pregunta' and token_question[1] in range(1, 13): token_question = token_question[2:] # Proceso las opciones words_option = [normalize(x) for x in opciones] words_option = [tokenize(l) for l in words_option] token_option = [l.split(' ') for l in words_option] for i in range(len(token_option)): if token_option[i][0] in [ '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' ]: token_option[i] = token_option[i][1:] # Modificar en caso de realizar analisis mas complejos token_option = [list(set(l)) for l in token_option] query = ' '.join(token_question) return (query, token_question, token_option, ' no ' in pregunta or ' not ' in pregunta)
def search(self, search_term: Text) -> WebInfo: # [('Titulo','Resumen','URL')] try: html_text = Browser().getHTML("https://www.bing.com/search?q=" + search_term.replace(' ', '+')) html = tree.HTML(html_text) titles = reduceXpath(html, "//ol[@id='b_results']//li[@class='b_algo']//h2//a") titles = [normalize(x.xpath("string(//a)")) for x in titles] contents = reduceXpath(html, "//ol[@id='b_results']//li[@class='b_algo']//p") contents = [normalize(x.xpath("string(//p)")) for x in contents] links = reduceXpath(html, "//ol[@id='b_results']//li[@class='b_algo']//h2//a") links = [findurl(l.get('href')) for l in links] links = [l for l in links if not (l is None)] results = zip3(titles, contents, links, '') return results except: print("Bing Fallo") pass return None
def getTrivia(shaper: ImageShape, ocr: OCR) -> Optional[Trivia]: # Nombre que no choca con nada file_pregunta = str( str(os.getcwd()) + '/' + str(random.randint(1, 10001)) + 'runtimecreationtoremove_question_file.png') file_opciones = str( str(os.getcwd()) + '/' + str(random.randint(1, 10001)) + 'runtimecreationtoremove_options_file.png') # Corto la imagen shaper.shapeImage(file_pregunta, file_opciones) # Extraigo el texto pre_text = ocr.getText(file_pregunta) opt_text = ocr.getText(file_opciones) # Remuevo el archivo creado por cutImage os.remove(file_pregunta) os.remove(file_opciones) if (pre_text is None) or (opt_text is None): return None # Limpio las listas de strings # Pregunta pre_text = normalize(pre_text) pre_txt = str(pre_text).split('?') while ('' in pre_txt): pre_txt.remove('') pre_text = pre_txt[0] pre_text = pre_text.replace('\n', ' ') + '?' # Opciones opt_text = opt_text.replace('\n', '\t') opt_text = normalize(opt_text) # En caso de que ocr halla leido 'Pregunta N' for nu in range(1, 13): prg = 'pregunta ' + str(nu) if pre_text.count(prg) > 0: pre_text = pre_text.replace(prg, '') break opt_txt = str(opt_text).split('\t') while ('' in opt_txt): opt_txt.remove('') return (pre_text, opt_txt)
def search(self, search_term: Text) -> WebInfo: try: html_text = Browser().getHTML( "https://www.google.com.ar/search?q=" + search_term.replace(' ', '+')) html = tree.HTML(html_text) titles = html.xpath("//div[@class='g']//h3[@class='r']//a") titles = [normalize(t.xpath('string()')) for t in titles] contents = reduceXpath( html, "//div[@class='g']//div[@class='s']//span[@class='st']") contents = [ normalize(x.xpath("string()").replace('\n', '')) for x in contents ] links = html.xpath("//div[@class='g']//h3[@class='r']//a") links = [findurl(l.get('href')) for l in links] links = [l for l in links if not (l is None)] results = zip3(titles, contents, links, '') return results except: print("Google Fallo") pass return None