def startThreads(self, url, bundle_url): startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() #Read only the first 1MB of data snippet = response.read(50000) soup = BeautifulSoup(snippet) links = soup.findAll('a', {"target" : "_source"}) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc counter = 0 #populate queue with hosts for link in links: counter += 1 try: new_searchResult = SearchResult(hostname, link['href'], None) newTags, newAttrs = new_searchResult.exploreSource() #Get the CSet variables Explore.allTags = Explore.allTags.union(newTags) Explore.allAttrs = Explore.allAttrs.union(newAttrs) except: print "link unexplored" pass elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % elapsedTime
def run(self): while True: #grab url from queue link = self.queue.get() print "link href:", link['href'] try: new_searchResult = SearchResult(self.hostname, link['href'], None) new_searchResult.searchSource() #Get the CSet variables new_searchResult.grabBundleSource(self.mf) #Grab the Bundle Resources #Container if self.param['container'] == 'dialog': ###Search for Dialogs self.runDialogSearch(new_searchResult) elif self.param['container'] == 'page': #Page tagAfKey = "af:panelgrouplayout" self.runPageSearch(new_searchResult, tagAfKey) elif self.param['container'] == 'explore': #Generic Search tagName = None if self.param['tag'] != 'All': print "tag:", self.param['tag'] tagName = self.param['tag'] attr_dict = dict() if self.param['attribute'] != 'All': print "attributeA:", self.param['attribute'] if self.param['attribute_value'] is not None: print "value:", self.param['attribute_value'] attr_dict[(self.param['attribute'])] = self.param['attribute_value'] else: print "value: compile" attr_dict[(self.param['attribute'])] = re.compile(".+") self.runPageSearch(new_searchResult, tagName, attr_dict) else: self.runPageSearch(new_searchResult, tagName) elif self.param['container'] == 'icon': #Icons self.runIconSearch(new_searchResult) print "successful" #print Output to Files except: print "unsuccessful" pass #signals to queue job is done self.queue.task_done()
def performSearch(self, criteria): #initial configuration logger = Logger.getInstance() logger.logDebug('performing search with criteria: ' + criteria) props = Properties.getInstance() index_path = props.get(self.__environment, 'index_path') # define schema and get a writer for it ix = open_dir(index_path) # perform search searchResults = [] i = 0 with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(criteria) results = searcher.search(query, limit=20) for result in results: order = result['orderno'] path = result['path'] title = result['title'] searchResult = SearchResult(title, path, order, criteria) searchResults.append(searchResult) i = i + 1 logger.logDebug("results found: " + str(i)) return searchResults
def on_searchBtn_clicked(self, dialog): #update search cache self.update_field_cache() # open a search result window self.resultWindow = QtWidgets.QDialog() Q_IN = ( 'select ' 'ID, ' '"入帳" as type, ' 'name, ' 'acquire_date as date, ' 'keeper, ' 'remark ' 'from ' 'hvhnonc_in ' 'where ' 'name like :q or category like :q ' 'or subcategory like :q or brand like :q ' 'or spec like :q or place like :q ' 'or keep_department like :q or use_department like :q ' 'or keeper like :q or remark like :q ') Q_OUT = ( 'select ' 'hvhnonc_out.ID as ID, ' '"除帳" as type, ' 'hvhnonc_in.name as name, ' 'hvhnonc_out.unregister_date as date, ' 'hvhnonc_in.keeper as keeper, ' 'hvhnonc_out.unregister_remark as remark ' 'from ' 'hvhnonc_out ' 'inner join ' 'hvhnonc_in ' 'on ' 'hvhnonc_out.in_ID = hvhnonc_in.ID ' 'and (hvhnonc_in.name like :q ' 'or hvhnonc_in.category like :q ' 'or hvhnonc_in.subcategory like :q ' 'or hvhnonc_in.brand like :q ' 'or hvhnonc_in.spec like :q ' 'or hvhnonc_in.place like :q ' 'or hvhnonc_in.keep_department like :q ' 'or hvhnonc_in.use_department like :q ' 'or hvhnonc_in.keeper like :q ' 'or hvhnonc_in.remark like :q) ') Q_BOTH = (Q_IN + 'union all ' + Q_OUT) Q_ORDER = 'order by date desc' if self.mode == 'in': sqlstr = Q_IN + Q_ORDER if self.mode == 'out': sqlstr = Q_OUT + Q_ORDER if self.mode == 'both': sqlstr = Q_BOTH + Q_ORDER params = ('%{}%'.format(self.query.currentText()),) SearchResult(self.resultWindow, sqlstr, params) dialog.done(self.resultWindow.exec_())
def on_serial_lookup_clicked(self): # open a result window sqlstr = ("select object_ID, name, count(*) as '數量' " "from hvhnonc_in group by name") self.resultWindow = QtWidgets.QDialog() params = [] self.sr = SearchResult(self.resultWindow, sqlstr, params) self.resultWindow.resize(320,600) self.sr.tableWidget.doubleClicked.disconnect() self.resultWindow.exec_()
def factory(type): if type == "SearchNews": from SearchNews import SearchNews return SearchNews() if type == "SearchImage": from SearchImage import SearchImage return SearchImage() if type == "SearchResult": from SearchResult import SearchResult return SearchResult() assert 0, "Bad shape creation: " + type
def search(self, query, k): results = [] query_term_freq = collections.defaultdict() for token in query: if token in query_term_freq: query_term_freq[token] = query_term_freq[token] + 1 else: query_term_freq[token] = 1 query_weight = {} for token in query: # global term_document_weight # global documents if token not in self.term_document_weight: continue else: weight = TFIDFSearcher.tf(query_term_freq[token]) \ * TFIDFSearcher.idf(len(self.documents), len(self.term_document_weight[token])) query_weight[token] = weight query_norm = 0.0 for token in query_weight.keys(): query_norm += query_weight[token]**2 query_norm = math.sqrt(query_norm) if query_norm == 0.0: return self.documents[:k] # global docs_norm for doc in self.documents: working_tokens = set(doc.tokens) working_tokens.intersection_update(set(query_weight)) score = 0.0 doc_norm = self.docs_norm[doc.id] for token in working_tokens: score += query_weight[token] * self.term_document_weight[ token][doc.id] score = score / (query_norm * doc_norm) results.append(SearchResult(doc, score)) print(doc.name + ' ' + str(score)) results.sort(reverse=True) documents_to_return = [] if k > len(results): k = len(results) for i in range(k): documents_to_return.append( results[i] ) #.document) # changed to return the SearchResult for further calculation return documents_to_return
def startThreads(self, url, bundle_url): startTime = time.time() ADFBundle.grabBundleKeysByURL(bundle_url) print "processing... ", url new_agent = HttpAgent(url) response = new_agent.RequestResponse() #Read only the first 1MB of data snippet = response.read(50000) soup = BeautifulSoup(snippet) links = soup.findAll('a', {"target": "_source"}) #Create an instance for each search results parse_url = urlparse(response.geturl()) hostname = parse_url.scheme + '://' + parse_url.netloc counter = 0 #populate queue with hosts for link in links: counter += 1 try: new_searchResult = SearchResult(hostname, link['href'], None) newTags, newAttrs = new_searchResult.exploreSource( ) #Get the CSet variables Explore.allTags = Explore.allTags.union(newTags) Explore.allAttrs = Explore.allAttrs.union(newAttrs) except: print "link unexplored" pass elapsedTime = (time.time() - startTime) print "Elapsed Time: %s" % elapsedTime
def retrieve_active_listings(self): # Make a request to the active listings page # TODO: Handle multiple pages (Probably visiting all pages, slow ...) url = self.active_listings_url(self._account_name) page = urllib.request.urlopen(url) soup = BeautifulSoup(page, 'html.parser') # Get all the search results listing on the page search_results_context = soup.findAll('li', {'class': 'sresult'}) active_listings = [] # Pass each li context to construct a search result object representation for listing_context in search_results_context: search_result = SearchResult(listing_context) active_listings.append(search_result) return active_listings
def search(self, query=None): if query is not None: self.query = query self.query = self.query.replace(' ', '+') url = "https://www.googleapis.com/customsearch/v1?key=" + self.api_key + "&cx=" + self.cx_id + "&q=" + self.query + "&alt=json" if self.api_key is not "" and self.cx_id is not "" and query is not "": try: connection = urllib2.urlopen(url) res = json.loads(connection.read()) for r in res["items"]: row = SearchResult(r) self.data.append(row) return self.data except urllib2.HTTPError, error: return None
def display(soup): result_list_row = 2 display_row = 0 result_list_column = 0 result_count = 0 for result in soup.find_all("article", class_='fixed-recipe-card'): with urllib.request.urlopen(result.find("img", class_='fixed-recipe-card__img') .attrs['data-original-src']) as u: raw_data = u.read() im = Image.open(BytesIO(raw_data)) image = ImageTk.PhotoImage(im.resize(image_size)) results.append(SearchResult(result.find("span", class_='fixed-recipe-card__title-link').string, result.find("span", class_='stars').attrs['data-ratingstars'], result.find("span", class_='fixed-recipe-card__reviews').contents[0].attrs[ 'number'], image, result.find("a", {'data-internal-referrer-link': 'hub recipe'}).attrs['href'], False)) for result in results: if len(result.title) > 21: result.title = result.title[:21] + "..." container = tk.Frame(root, width=150) if result.selected: container.background = "green" tk.Message(master=container, text=result.title, bg="blue", fg="white", width=150).pack(fill=tk.BOTH) tk.Label(master=container, image=result.image, bg="orange").pack() tk.Label(master=container, text=result.stars).pack() tk.Label(master=container, text=result.reviews).pack() result_containers[display_row].append( container.grid(row=result_list_row, column=result_list_column, padx=3, pady=3)) result_list_column = result_list_column + 1 result_count = result_count + 1 if result_count % 5 == 0: result_list_row = result_list_row + 1 display_row = display_row + 1 result_list_column = 0
def search(params): """Performs a search query on AZLyrics and scrapes the search results. Parameters ---------- params : str Parameters for the search query Returns ------- list(SearchResult,) A list of search results returned from AZLyrics Raises ------ NoResultsError If there were no search results returned """ # Get the search result's BeautifulSoup and see if it has any search results doc = get_url_soup(AZLYRICS_SEARCH_URL, params) if no_results(doc): raise NoResultsError( 'No search results were found for the query: `{}`'.format( params['q'])) # Compile all of the search results into a list of SearchResult instances search_results = [] result_list = doc.findAll('td', {'class': 'text-left'}) for result in result_list: # Remove digit text = re.sub(r'^\d+.\s', '', result.text.strip()) # Isolate first line text = re.sub(r' +', ' ', text).split('\n')[0] # Grab first link link = result.findAll('a')[0]['href'] search_results.append(SearchResult(text, link)) return search_results
def run(self): while True: #grab url from queue link = self.queue.get() print "link href:", link['href'] try: new_searchResult = SearchResult(self.hostname, link['href'], None) new_searchResult.searchSource() #Get the CSet variables new_searchResult.grabBundleSource( self.mf) #Grab the Bundle Resources #Container if self.param['container'] == 'dialog': ###Search for Dialogs self.runDialogSearch(new_searchResult) elif self.param['container'] == 'page': #Page tagAfKey = "af:panelgrouplayout" self.runPageSearch(new_searchResult, tagAfKey) elif self.param['container'] == 'explore': #Generic Search tagName = None if self.param['tag'] != 'All': print "tag:", self.param['tag'] tagName = self.param['tag'] attr_dict = dict() if self.param['attribute'] != 'All': print "attributeA:", self.param['attribute'] if self.param['attribute_value'] is not None: print "value:", self.param['attribute_value'] attr_dict[(self.param['attribute'] )] = self.param['attribute_value'] else: print "value: compile" attr_dict[( self.param['attribute'])] = re.compile(".+") self.runPageSearch(new_searchResult, tagName, attr_dict) else: self.runPageSearch(new_searchResult, tagName) elif self.param['container'] == 'icon': #Icons self.runIconSearch(new_searchResult) print "successful" #print Output to Files except: print "unsuccessful" pass #signals to queue job is done self.queue.task_done()
# allow_q = Term("url", "https://smittenkitchen.com/2021/12/short-rib-onion-soup/") # # Don't show any documents where the "tag" field contains "todo" # restrict_q = query.Term("tag", "todo") with ix.searcher() as s: final_results = set() if len(ing_list) > 0: results = [] for ingred in ing_list: print(" ".join(ingred)) # allow_q = Term("ingredients", " ".join(ingred)) # res = s.search(myquery, filter=allow_q, terms = True, limit = 20) res = s.search(parser.parse(" ".join(ingred)), terms = True, limit = 20) for rr in res: r = SearchResult(rr) final_results.add(r) print("Found " + str(len(results)) + "results") results = s.search(myquery, terms = True, limit = 20) #filter=allow_q, mask=restrict_q print("Found " + str(len(results)) + "results") print(results.scored_length()) for rr in results: r = SearchResult(rr) final_results.add(r) for i, r in enumerate(final_results): print("result found: " + str(i)) print(r.title) print(r.url) print("\n")
def on_submitBtn_clicked(self, dialog): # open a search result window self.resultWindow = QtWidgets.QDialog() sqlstr, params = self.load_form_query() SearchResult(self.resultWindow, sqlstr, params) dialog.done(self.resultWindow.exec_())
def convert_results(articles, stemmed): return [SearchResult(article, stemmed) for article in articles]
#!/usr/bin/env python from SearchResult import SearchResult from PyDictionary import PyDictionary thesaurus = PyDictionary() terms = [ 'red', 'blue', 'yellow' ] searchResults = [] for term in terms: searchResults.append(SearchResult(term)) book = open('sense-and-sensibility.txt') for line in book: for term in searchResults: if term.term in line: term.increment() for synonym in term.synonyms: if synonym in line: term.increment(synonym) for term in searchResults: print('There are {} occurrences of synonyms of {} (using {} as synonyms)'.format(term.count, term.term, term.synonyms))