示例#1
0
def storeAndSearch(search):
    results = boss.getSearchResults(search)
    logging.info('get some results: %s' % results)
    type = search.type
    hits = []
    for result in results:
        site = findSite(result['url'], type)
        if site is None:
            continue
        logging.info('site is not none!!!!!')
        sr = models.SearchResult(url=result['url'],
                                 title=result['title'],
                                 searchTerm=search.term,
                                 site=site,
                                 type=search.type)
        sr.put()
        hits.append(sr)
    return hits
示例#2
0
    def search(self, q, maxResult=40, start=0, **params):
        """Yummly search recipe API request

        :param q: search string
        :param maxResult: max results
        :param start: pagination offset in # of records (e.g. start=5 means
            skip first 5 results)
        :param **params: optional kargs corresponding to Yummly supported
            search parameters
        """

        url = self.URL_SEARCH

        # copy params to leave source unmodified
        params = params.copy()
        params.update({'q': q, 'maxResult': maxResult, 'start': start})

        response = self._request(url, params=params)
        result = self._extract_response(response)

        search_result = models.SearchResult(**result)

        return search_result
示例#3
0
 def setUp(self):
     self.new_search_result = models.SearchResult(query_text="test text")
示例#4
0
    def search(self, query, lang='en'):
        pagenow = 0
        npages = None
        utf8_parser = etree.HTMLParser(encoding='utf-8')

        while pagenow < npages or npages is None:
            # print "PAGE: {0}/{1}".format(pagenow, npages)

            self.br.open(
                "{0}?mainPage=showSearchResult&searchFor={1}&resultsPage={2}".
                format(self.base, query.replace(" ", "+"), pagenow))
            tree = etree.fromstring(self.br.response().read().decode('utf-8'),
                                    parser=utf8_parser)

            # number of pages
            if npages is None:
                href = tree.xpath(
                    '//*[@id="siteContents"]/div/div[1]/span[3]/a[2]/@href')
                npages = 1
                if len(href):
                    m = re.search('resultsPage=(\d+)', href[0])
                    npages = int(m.group(1)) + 1

            # serach table
            tree2 = tree.xpath(
                "//table[contains(@class, 'SearchTable')]/tbody")
            if len(tree2) == 0:
                result = {
                    'img': '',
                    'expansion': '',
                    'rarity': '',
                    'name': '',
                    'id': '',
                    'category': '',
                    'available': '',
                    'price_from': 0
                }

                data = tree.xpath(
                    "//span[contains(@class, 'prodImage')]/img/@src")[0]
                if data:
                    result['img'] = data

                data = tree.xpath(
                    '//h1[contains(@class, "nameHeader")]')[0].text
                if data:
                    m = re.search("(.*)\((.*)\)", data)
                    result['name'] = m.group(1).strip()
                    result['expansion'] = m.group(2).strip()

                tree2 = tree.xpath(
                    "//table[contains(@class, 'infoTable')]/tbody")[0]
                data = tree2.xpath("tr[1]/td[2]/img/@onmouseover")
                if data:
                    m = re.search("'(.+?)'", data[0])
                    result['rarity'] = m.group(1)

                data = tree.xpath(
                    "//input[contains(@name, 'idProduct')]/@value")[0]
                if data:
                    result['id'] = result['name'].replace(
                        " ", "_") + "_" + result['expansion'].replace(
                            " ", "_") + ".c1p" + data + ".prod"

                tree2 = tree.xpath(
                    '//table[contains(@class, "availTable")]/tbody')[0]
                avstr = tree2.xpath('tr/td[2]')[0].text
                if (avstr is None):
                    result['available'] = 0
                else:
                    result['available'] = int(avstr)

                if (result['available'] > 0):
                    pfstr = tree2.xpath('tr/td[2]')[1].text.replace(
                        ",", ".").replace(u'\u20ac', "")
                    if (pfstr != "N/A"):
                        result['price_from'] = float(pfstr)
                    else:
                        result['price_from'] = 0.0
                else:
                    result['price_from'] = price_from = 0.0

                c = models.Card(result['id'],
                                name=result['name'],
                                img=result['img'])
                yield models.SearchResult(c, result['expansion'],
                                          result['rarity'], result['category'],
                                          result['available'],
                                          result['price_from'])

            tree = tree2[0]

            # rows
            rows = tree.xpath("tr[contains(@class, 'row_')]")
            for row in rows:
                result = {
                    'img': '',
                    'expansion': '',
                    'rarity': '',
                    'name': '',
                    'id': '',
                    'category': '',
                    'available': '',
                    'price_from': 0
                }

                data = row.xpath("td[1]//img/@onmouseover")
                if data:
                    m = re.search("'(.+?)'", data[0])
                    result['img'] = m.group(1)

                data = row.xpath("td[2]/span/@onmouseover")
                if data:
                    m = re.search("'(.+)'", data[0])
                    result['expansion'] = m.group(1).strip()

                data = row.xpath("td[3]/img/@onmouseover")
                if data:
                    m = re.search("'(.+?)'", data[0])
                    result['rarity'] = m.group(1).strip()

                data = row.xpath("td[5]/a")
                if data:
                    result['id'] = data[0].attrib['href']
                    result['name'] = data[0].text.strip()

                data = row.xpath("td[6]")
                if data:
                    result['category'] = data[0].text.strip()

                data = row.xpath("td[7]")
                if data:
                    result['available'] = int(data[0].text)

                data = row.xpath("td[8]")
                if data:
                    if data[0].text == u"N/A":
                        result['price_from'] = 0
                    else:
                        m = re.search("(\d+,\d+) ", data[0].text)
                        result['price_from'] = float(
                            m.group(1).replace(',', '.'))

                if (result['name'] == query):
                    if (result['expansion'].find(u'WCD') < 0 and
                            result['expansion'].find(u'Collectors\\\' Edition')
                            < 0 and
                            result['expansion'].find(u'International Edition')
                            < 0):
                        c = models.Card(result['id'],
                                        name=result['name'],
                                        img=result['img'])
                        yield models.SearchResult(c, result['expansion'],
                                                  result['rarity'],
                                                  result['category'],
                                                  result['available'],
                                                  result['price_from'])

            # next page
            pagenow += 1
    def get(self, query_text):
        """Search file for occurrences of 'query_text'

        :param query_text: String of arbitrary text
        :return: SearchResult JSON object
        """

        with open(FILEPATH, encoding='utf-8') as f:
            lines = f.readlines()

        new_search_result = models.SearchResult(query_text=query_text)

        occurrence_object_list = []

        for line in lines:
            line_index = lines.index(line)

            for m in re.finditer(re.escape(query_text), line, re.M | re.I):

                text_start = m.start()
                text_end = m.end()

                #Initial params for second part of sentence
                second_part = ''
                boundary_index = None
                line_count = 1
                search_line = line[text_start:].replace('"', "'")

                #intial params for first part of sentence
                first_part = ''
                boundary_index_rev = None
                line_count_rev = -1
                search_line_rev = line[:text_start].replace('"', "'")

                while boundary_index == None or boundary_index_rev == None:
                    # Forward Scan of query_text sentence until punctuation or \n
                    if boundary_index == None:
                        if ("." not in search_line and "?" not in search_line
                                and "!" not in search_line):

                            second_part += search_line
                            try:
                                search_line = lines[line_index +
                                                    line_count].replace(
                                                        '"', "'")
                            except IndexError:
                                boundary_index = search_line.index(
                                    search_line[-1])
                            else:
                                if search_line == "\n":
                                    boundary_index = lines[line_index +
                                                           line_count -
                                                           1].index("\n")

                            line_count += 1
                        else:
                            for punc in (".", "!", "?"):
                                try:
                                    boundary_index = search_line.index(punc)
                                except ValueError:
                                    continue
                            try:
                                #If last word is in quotes, grab quote after period
                                if search_line[boundary_index + 1] == "'":
                                    add_quote_index = 2
                                else:
                                    add_quote_index = 1
                            except IndexError:
                                add_quote_index = 0
                            second_part += search_line[:boundary_index +
                                                       add_quote_index]

                    # Backwards Scan of query_text sentence until punctuation or \n
                    if boundary_index_rev == None:
                        if ("." not in search_line_rev
                                and "?" not in search_line_rev
                                and "!" not in search_line_rev):
                            first_part = search_line_rev + first_part

                            if search_line_rev == "\n":
                                boundary_index_rev = search_line_rev.index(
                                    "\n")

                            elif line_index + line_count_rev >= 0:
                                search_line_rev = lines[
                                    line_index + line_count_rev].replace(
                                        '"', "'")
                                line_count_rev -= 1
                            else:
                                boundary_index_rev = search_line_rev.index(
                                    search_line_rev[0])
                        else:
                            for punc in (".", "!", "?"):
                                try:
                                    boundary_index_rev = search_line_rev.rindex(
                                        punc)
                                except ValueError:
                                    continue
                            first_part = (
                                search_line_rev[boundary_index_rev + 1:] +
                                first_part)

                sentence = (first_part + second_part).replace('\n',
                                                              ' ').strip()

                occurrence_object_list.append(
                    models.Occurrence(search_result=new_search_result,
                                      line=line_index + 1,
                                      start=text_start + 1,
                                      end=text_end + 1,
                                      in_sentence=sentence))

        #Add occurrences to SearchResult
        setattr(new_search_result, 'occurrences', occurrence_object_list)
        new_search_result.set_num_of_occurrences()
        response = marshal(new_search_result, search_fields)
        return jsonify(response)