def storeAndSearch(search): results = boss.getSearchResults(search) logging.info('get some results: %s' % results) type = search.type hits = [] for result in results: site = findSite(result['url'], type) if site is None: continue logging.info('site is not none!!!!!') sr = models.SearchResult(url=result['url'], title=result['title'], searchTerm=search.term, site=site, type=search.type) sr.put() hits.append(sr) return hits
def search(self, q, maxResult=40, start=0, **params): """Yummly search recipe API request :param q: search string :param maxResult: max results :param start: pagination offset in # of records (e.g. start=5 means skip first 5 results) :param **params: optional kargs corresponding to Yummly supported search parameters """ url = self.URL_SEARCH # copy params to leave source unmodified params = params.copy() params.update({'q': q, 'maxResult': maxResult, 'start': start}) response = self._request(url, params=params) result = self._extract_response(response) search_result = models.SearchResult(**result) return search_result
def setUp(self): self.new_search_result = models.SearchResult(query_text="test text")
def search(self, query, lang='en'): pagenow = 0 npages = None utf8_parser = etree.HTMLParser(encoding='utf-8') while pagenow < npages or npages is None: # print "PAGE: {0}/{1}".format(pagenow, npages) self.br.open( "{0}?mainPage=showSearchResult&searchFor={1}&resultsPage={2}". format(self.base, query.replace(" ", "+"), pagenow)) tree = etree.fromstring(self.br.response().read().decode('utf-8'), parser=utf8_parser) # number of pages if npages is None: href = tree.xpath( '//*[@id="siteContents"]/div/div[1]/span[3]/a[2]/@href') npages = 1 if len(href): m = re.search('resultsPage=(\d+)', href[0]) npages = int(m.group(1)) + 1 # serach table tree2 = tree.xpath( "//table[contains(@class, 'SearchTable')]/tbody") if len(tree2) == 0: result = { 'img': '', 'expansion': '', 'rarity': '', 'name': '', 'id': '', 'category': '', 'available': '', 'price_from': 0 } data = tree.xpath( "//span[contains(@class, 'prodImage')]/img/@src")[0] if data: result['img'] = data data = tree.xpath( '//h1[contains(@class, "nameHeader")]')[0].text if data: m = re.search("(.*)\((.*)\)", data) result['name'] = m.group(1).strip() result['expansion'] = m.group(2).strip() tree2 = tree.xpath( "//table[contains(@class, 'infoTable')]/tbody")[0] data = tree2.xpath("tr[1]/td[2]/img/@onmouseover") if data: m = re.search("'(.+?)'", data[0]) result['rarity'] = m.group(1) data = tree.xpath( "//input[contains(@name, 'idProduct')]/@value")[0] if data: result['id'] = result['name'].replace( " ", "_") + "_" + result['expansion'].replace( " ", "_") + ".c1p" + data + ".prod" tree2 = tree.xpath( '//table[contains(@class, "availTable")]/tbody')[0] avstr = tree2.xpath('tr/td[2]')[0].text if (avstr is None): result['available'] = 0 else: result['available'] = int(avstr) if (result['available'] > 0): pfstr = tree2.xpath('tr/td[2]')[1].text.replace( ",", ".").replace(u'\u20ac', "") if (pfstr != "N/A"): result['price_from'] = float(pfstr) else: result['price_from'] = 0.0 else: result['price_from'] = price_from = 0.0 c = models.Card(result['id'], name=result['name'], img=result['img']) yield models.SearchResult(c, result['expansion'], result['rarity'], result['category'], result['available'], result['price_from']) tree = tree2[0] # rows rows = tree.xpath("tr[contains(@class, 'row_')]") for row in rows: result = { 'img': '', 'expansion': '', 'rarity': '', 'name': '', 'id': '', 'category': '', 'available': '', 'price_from': 0 } data = row.xpath("td[1]//img/@onmouseover") if data: m = re.search("'(.+?)'", data[0]) result['img'] = m.group(1) data = row.xpath("td[2]/span/@onmouseover") if data: m = re.search("'(.+)'", data[0]) result['expansion'] = m.group(1).strip() data = row.xpath("td[3]/img/@onmouseover") if data: m = re.search("'(.+?)'", data[0]) result['rarity'] = m.group(1).strip() data = row.xpath("td[5]/a") if data: result['id'] = data[0].attrib['href'] result['name'] = data[0].text.strip() data = row.xpath("td[6]") if data: result['category'] = data[0].text.strip() data = row.xpath("td[7]") if data: result['available'] = int(data[0].text) data = row.xpath("td[8]") if data: if data[0].text == u"N/A": result['price_from'] = 0 else: m = re.search("(\d+,\d+) ", data[0].text) result['price_from'] = float( m.group(1).replace(',', '.')) if (result['name'] == query): if (result['expansion'].find(u'WCD') < 0 and result['expansion'].find(u'Collectors\\\' Edition') < 0 and result['expansion'].find(u'International Edition') < 0): c = models.Card(result['id'], name=result['name'], img=result['img']) yield models.SearchResult(c, result['expansion'], result['rarity'], result['category'], result['available'], result['price_from']) # next page pagenow += 1
def get(self, query_text): """Search file for occurrences of 'query_text' :param query_text: String of arbitrary text :return: SearchResult JSON object """ with open(FILEPATH, encoding='utf-8') as f: lines = f.readlines() new_search_result = models.SearchResult(query_text=query_text) occurrence_object_list = [] for line in lines: line_index = lines.index(line) for m in re.finditer(re.escape(query_text), line, re.M | re.I): text_start = m.start() text_end = m.end() #Initial params for second part of sentence second_part = '' boundary_index = None line_count = 1 search_line = line[text_start:].replace('"', "'") #intial params for first part of sentence first_part = '' boundary_index_rev = None line_count_rev = -1 search_line_rev = line[:text_start].replace('"', "'") while boundary_index == None or boundary_index_rev == None: # Forward Scan of query_text sentence until punctuation or \n if boundary_index == None: if ("." not in search_line and "?" not in search_line and "!" not in search_line): second_part += search_line try: search_line = lines[line_index + line_count].replace( '"', "'") except IndexError: boundary_index = search_line.index( search_line[-1]) else: if search_line == "\n": boundary_index = lines[line_index + line_count - 1].index("\n") line_count += 1 else: for punc in (".", "!", "?"): try: boundary_index = search_line.index(punc) except ValueError: continue try: #If last word is in quotes, grab quote after period if search_line[boundary_index + 1] == "'": add_quote_index = 2 else: add_quote_index = 1 except IndexError: add_quote_index = 0 second_part += search_line[:boundary_index + add_quote_index] # Backwards Scan of query_text sentence until punctuation or \n if boundary_index_rev == None: if ("." not in search_line_rev and "?" not in search_line_rev and "!" not in search_line_rev): first_part = search_line_rev + first_part if search_line_rev == "\n": boundary_index_rev = search_line_rev.index( "\n") elif line_index + line_count_rev >= 0: search_line_rev = lines[ line_index + line_count_rev].replace( '"', "'") line_count_rev -= 1 else: boundary_index_rev = search_line_rev.index( search_line_rev[0]) else: for punc in (".", "!", "?"): try: boundary_index_rev = search_line_rev.rindex( punc) except ValueError: continue first_part = ( search_line_rev[boundary_index_rev + 1:] + first_part) sentence = (first_part + second_part).replace('\n', ' ').strip() occurrence_object_list.append( models.Occurrence(search_result=new_search_result, line=line_index + 1, start=text_start + 1, end=text_end + 1, in_sentence=sentence)) #Add occurrences to SearchResult setattr(new_search_result, 'occurrences', occurrence_object_list) new_search_result.set_num_of_occurrences() response = marshal(new_search_result, search_fields) return jsonify(response)