def get_post(): r = request.get_json() if r['type'] == 'message_new': parse_command(r['object']) elif r['type'] == 'send_digest': searcher.search() return 'ok'
def ui_searcher(): print('Searcher') es_connection = input( 'Elasticsearch connection? (default to localhost:9200) ') params = dict() if es_connection: params['es_connection'] = { 'host': es_connection.split(':')[0], 'port': int(es_connection.split(':')[1]), } searcher.search(**params)
def searchResult(): query = request.args.get('query') urls = [''] * 5 searchEni = search(glob(os.path.join("indexFile", "*TokenDocId.txt"))[0]) if query != None and query != "": start = time.time() try: urls = searchEni.start(query) except: urls = [''] * 5 searchTime = time.time() - start flash(f"Searching completed in {round(searchTime, 5)} seconds.") # add pagination page, per_page, offset = get_page_args(page_parameter='page', per_page_parameter='per_page') pagination_urls = getUsers(urls, offset=offset, per_page=per_page) pagination = Pagination(page=page, per_page=per_page, total=len(urls), alignment="center", css_framework='bootstrap4') query = "Enter here" if query == None else query return render_template("searchResult.html", query=query, urls=pagination_urls, page=page, per_page=per_page, pagination=pagination)
def find_relevant(query_path, low, high, main): if query_path == '': messagebox.showinfo("Error", "Please upload the query image before searching!") return [] query_image = cv2.imread(query_path) query_seghist_features = img2modihist(query_image) img = w2d(query_path, 'db1', 5) grey = gc(img, [1], [ 0, np.pi / 8, np.pi / 4, 3 * np.pi / 8, np.pi / 2, 5 * np.pi / 8, 3 * np.pi / 4, 7 * np.pi / 8 ], levels=256, normed=True) contrast = gp(grey, 'contrast') energy = gp(grey, 'energy') correlation = gp(grey, 'correlation') (h, w) = img.shape[:2] for x in range(8): query_seghist_features.append(contrast[0][x] / 10000) for x in range(8): query_seghist_features.append(energy[0][x] * 10) for x in range(8): query_seghist_features.append(correlation[0][x]) retrieve_count = 10 best_seghist = search(query_seghist_features, retrieve_count, low, high, main) return best_seghist
def get(self): user_id = self.request.cookies.get('user_id') logging.info('[SearchHandler:GET] user_id: [%s]' % user_id) if user_id is None or len(user_id) == 0: self.redirect("/") return q = self.request.get('q') p = self.request.get('p') logging.info('[SearchHandler:GET] q: [%s] p:[%s]' % (q, p)) user = User.get_by_key_name(user_id,parent=None) posts = search(user, q, p) results = [] for post in posts: if post: ids = post.id.split('_') result ={} result['from_name'] = post.from_name result['created_time'] = post.created_time result['date_time'] = datetime.datetime.strptime(post.created_time, '%Y-%m-%dT%H:%M:%S+0000') result['message'] = post.message result['url'] = 'http://www.facebook.com/'+ids[0]+'/posts/' + ids[1] results.append(result) template_values = { 'user':user, 'logout_url':settings.LOGOUT_URL, 'results': results, 'query': q, 'phrase':p } template = jinja_environment.get_template('templates/search.html') self.response.out.write(template.render(template_values))
def run(self): while True: # reflash once per 2 minutes sleep(120) internet_usage = search() # send the signal self.trigger.emit(internet_usage)
def do_GET(self): parsed_path = urlparse.urlparse(self.path) try: message = searcher.search(parsed_path.query) #query passed into the searcher . make sure to prefix your queries by '?' i browser. self.send_response(200) #marking the end of response by blank line self.end_headers() self.wfile.write(message) # Writing the data to a output file except: raise 404 return
def get_results_from_searcher(nlp_switch, search_bar, scoring_measure): index_id = None results = dict() if (nlp_switch == "true"): results = searcher.search(index_id, search_bar, scoring_measure) else: results = searcher.multiwordquery_driver(index_id, search_bar, scoring_measure) return results
def search(): query = request.args.get('q') results = searcher.search(query, rindex) for x in results: x.title = x.title[:config.max_title_len] + '...' if len( x.title) > config.max_title_len else x.title return render_template('results.html', results=results, count=len(results), query=query)
def Clicked3(): cd = Descriptor((8, 12, 3)) in_path = filedialog.askopenfilename() num= 1 print (in_path) query = cv2.imread(in_path) features = cd.describe(query) #histogram of the input microstructure is compared to histograms of all other microstructures/ searcher = Searcher("compare_hist.csv") results = searcher.search(features) print(results) root = Toplevel() root.grid_rowconfigure(0, weight=1) root.grid_columnconfigure(0, weight=1) cnv = Canvas(root) cnv.grid(row=0, column=0, sticky='nswe') hScroll = Scrollbar(root, orient=HORIZONTAL, command=cnv.xview) hScroll.grid(row=1, column=0, sticky='we') vScroll = Scrollbar(root, orient=VERTICAL, command=cnv.yview) vScroll.grid(row=0, column=1, sticky='ns') cnv.configure(xscrollcommand=hScroll.set, yscrollcommand=vScroll.set) frm = Frame(cnv) cnv.create_window(0, 0, window=frm, anchor='nw') for r,s in results: print('inside') img = image.load_img(s, target_size=(224, 224)) if img is not None: t = "test" x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) features = model.predict(x)[0] features = features.reshape(1,2048) p = m.predict_classes(features) p_f = m_f.predict_classes(features) k = m.predict(features) print(p[0]) t, s_f = getText(p[0],p_f[0]) nn= str(num) im = PIL.Image.open(s).resize((200,200)) tkimage = PIL.ImageTk.PhotoImage(im) myvar=Label(frm,image = tkimage, text ="(" + nn + ") " + t + " / " + s_f, compound=tkinter.BOTTOM) myvar.image = tkimage num = num + 1 myvar.pack() frm.update_idletasks() cnv.configure(scrollregion=(0, 0, frm.winfo_width(), frm.winfo_height()))
def search(): results_array = [] # Get url image_url = request.form.get('img') # Perform the search results = searcher.search(image_url) # Return success return jsonify(results=results, preview="images/" + image_url)
def fsearch(words): print("search") form2 = search_form() result = search(words) for line in result: print line[0],line[1] location = search_location(words) print(location) return render_template('exform.html',words=words,result = get_text(result),form=form2,collocation = get_location_text(location))
def do_GET(self): parsed_path = urlparse.urlparse(self.path) try: message = searcher.search( parsed_path.query ) #query passed into the searcher . make sure to prefix your queries by '?' i browser. self.send_response(200) #marking the end of response by blank line self.end_headers() self.wfile.write(message) # Writing the data to a output file except: raise 404 return
def result(): from searcher import search keyword = request.args.get('keyword').strip().encode('utf-8') keywords = keyword.split(' ') if keyword == '' or len(keywords) == 0: return render_template('search.html') # word, docId, phase, score results = search(keywords) results = map(lambda a: (a[0], paths[a[0]], a[1], a[2]), results) return render_template('result.html', keyword=keyword, results=results)
def parser_agent(parsed, texts, address="body"): parsed_first = parsed[0].select(address)[0] if parsed_first.get('id') is not None: address = "#{}".format(parsed_first.get('id')) parsed_first_classes = parsed_first.get('class') if parsed_first_classes is not None: for i in range(len(parsed_first_classes)): if len(parsed[0].select(".{}".format( parsed_first_classes[i]))) == 1: address = ".{}".format(parsed_first_classes[i]) break parsed_first_children = list(parsed_first.children) n = len(parsed_first_children) parsed_per_name = {} for i in range(n): child_name = parsed_first_children[i].name if child_name is None: continue if child_name in parsed_per_name: parsed_per_name[child_name] += [parsed_first_children[i]] else: parsed_per_name[child_name] = [parsed_first_children[i]] max_point = 0 max_address = "" for key in parsed_per_name: for i in range(len(parsed_per_name[key])): if len(address) == 0: new_address = "{}:nth-of-type({})".format(key, i + 1) else: new_address = "{} > {}:nth-of-type({})".format( address, key, i + 1) point = 0 for j in range(len(texts)): is_in_address = search(parsed[j], new_address, texts[j]) if is_in_address: point += 1 if point > max_point: max_point = point max_address = new_address if max_point != len(texts): print("answer is {}".format(address)) else: parser_agent(parsed, texts, max_address)
def execute(query, database, results_number): """ Executes query and outputs a number of the top search results :param query: :param database: :param results_number: :return: """ (query_key, webenv) = search(query, database, results_number) records = fetch(query_key, webenv, database, results_number) printout(records) return
def __init__(self): internet_usage = search() super(MainWindow, self).__init__() self.ui = Ui_Dialog() self.ui.setupUi(self) self.ui.lcdNumber.display(internet_usage) # daily limit 20GB usage_percent = float(internet_usage) / 20 * 100 print(usage_percent) # set the progress bar self.ui.progressBar.setValue(usage_percent) self.ui.progressBar.setFormat("%.02f %%" % usage_percent) # this is a thread which can get the data and reflash the window self.thread = Thread() self.thread.trigger.connect(self.reflasher) self.thread.start() self.setWindowTitle("網路用量表 %.01f %%" % usage_percent)
def fill_filelist(self, search, current_search): self.filelist.clear() already_matched = {} counter = [-1] def tick(): counter[0] += 1 if counter[0] % 50 == 0: refresh_gui() if self.current_search is not current_search: raise StopIteration() bad_matchers = {} for r in self.roots: try: bad_re = self.pwindow().manager.get_context_manager(r).get()['quick_open']['ignore'] except KeyError: bad_matchers[r] = None else: bad_matchers[r] = lambda path, bre=bad_re: bre.search(path) for m in (searcher.name_start_match, searcher.name_match, searcher.path_match, searcher.fuzzy_match): for root in self.roots: for p in searcher.search(os.path.dirname(root), os.path.basename(root), m(search), already_matched, bad_matchers[root], tick): if self.current_search is not current_search: return already_matched[p] = True self.filelist.append(p) if len(self.filelist) > 150: self.filelist_tree.columns_autosize() return self.filelist_tree.columns_autosize()
def statistics(request): organisations, city_facet = searcher.search({ "meta": {}, "searches": [ { "TermQuery": [["_type", "organisation"]] } ], "sort": "contact.street_address.municipality_fi,name_fi", "facets": ["contact.street_address.municipality_fi"] }) organisations = build_organisation_city_tree(organisations) #print(organisations) cities = extract_cities_from_facet(city_facet["contact.street_address.municipality_fi"]) view_data = { 'organisations': organisations, 'cities': cities, 'years': [y for y in range(2013, datetime.date.today().year)] } return template_render('statistics.html', view_data)
def food_price(city): default_price = 15. if city is None: return default_price query = 'average meal cost in {}'.format(city) ans = searcher.search(query) print('Expected food price at {}: '.format(city)) match = re.findall('\$\d+(?:\.\d+)?', ans) prices = [ float(''.join(list(filter(utils.isdigit_or_dot, m)))) for m in match ] if not prices: avg_price = default_price else: avg_price = sum(prices) / len(prices) print('${:.2f}'.format(avg_price)) return avg_price
def flight_price(from_city, to_city): if from_city is None or to_city is None: return None query = 'price from {} to {}'.format(from_city, to_city) ans = searcher.search(query) # match = re.search('\$\d+(?:\.\d+)?', ans) match = re.findall('\$\d+(?:\.\d+)?', ans) if not match: print('City pairs not supported yet, try another pair') return None else: print('Price from {} to {}: '.format(from_city, to_city)) # price = float(''.join(list(filter(utils.isdigit_or_dot, match.group(0))))) prices = [ float(''.join(list(filter(utils.isdigit_or_dot, m)))) for m in match ] price = sum(prices) / len(prices) print('${:.2f}'.format(price)) return price
def fill_filelist(self, search, current_search): self.filelist.clear() already_matched = {} counter = [-1] def tick(): counter[0] += 1 if counter[0] % 50 == 0: refresh_gui() if self.current_search is not current_search: raise StopIteration() root = self.get_current_root() try: bad_re = settings.ignore_contexts[root]['ignore'] def bad_matcher(path): return bad_re.search(path) except KeyError: bad_matcher = None for m in (searcher.name_start_match, searcher.name_match, searcher.path_match, searcher.fuzzy_match): for p in searcher.search(root, '', m(search), already_matched, bad_matcher, tick): if self.current_search is not current_search: return already_matched[p] = True self.filelist.append(p) if len(self.filelist) > 150: self.filelist_tree.columns_autosize() return self.filelist_tree.columns_autosize()
#Brandon Marshall #Python Scripting #October 1, 2015 #Homework 4 - File Traverser import data_load import indexer import searcher data_load.get_traversal_data() indexer.process_data("raw_data.pickle", "fortunes_shelve", "indexed_files") searcher.search("fortunes_shelve", "indexed_files")
def do_GET(self): global webserverdone self.state = ppwstate rim = self.informers["rim"] parms = parsepath(self.path) # Older revisions of adcc may not supply 'verbose' key try: self.informers["verbose"] except KeyError: self.informers["verbose"] = True try: if self.path == "/": page = """ <html> <head> </head> <body> <h4>prsproxy engineering interface</h4> <ul> <li><a href="/engineering">Engeering Interface</a></li> <li><a href="qap/engineering.html">Engeering AJAX App</a></li> <li><a href="datadir">Data Directory View</a></li> <li><a href="killprs">Kill this server</a> (%(numinsts)d """ +\ """copies ofreduce registered)</li> </ul> <body> </html>""" page % {"numinsts":rim.numinsts} self.send_response(200) self.send_header("content-type", "text-html") self.end_headers() self.wfile.write(page) return if parms["path"].startswith("/rqlog.json"): self.send_response(200) self.send_header('Content-type', "application/json") self.end_headers() if "file" in parms: logfile = parms["file"][0] print logfile if not os.path.exists(logfile): msg = "Log file not available" else: f = open(logfile, "r") msg = f.read() f.close() else: msg = "No log file available" tdic = {"log":msg} self.wfile.write(json.dumps(tdic, sort_keys=True, indent=4)) return # ------------------------------------------------------------------ # Server time if parms["path"].startswith("/rqsite.json"): self.send_response(200) self.send_header('Content-type', "application/json") self.end_headers() tdic = server_time() self.wfile.write(json.dumps(tdic, sort_keys=True, indent=4)) return # ------------------------------------------------------------------ # Metrics query employing fitsstore if parms["path"].startswith("/cmdqueue.json"): self.send_header('Content-type', "application/json") self._handle_cmdqueue_json(rim, parms) return # ------------------------------------------------------------------ if parms["path"].startswith("/cmdqueue.xml"): self.send_response(200) self.send_header('Content-type','text/xml') self.end_headers() if "lastcmd" in parms: start = int(parms["lastcmd"][0])+1 else: start = 0 elist = self.state.rim.displayCmdHistory.peekSince(cmdNum=start) print "prsw 200:", repr(elist) xml = '<commandQueue lastCmd="%d">' % (start-1) for cmd in elist: # this is because there should be only one top key # in the cmd dict cmdname = cmd.keys()[0] cmdbody = cmd[cmdname] xml += '<command name="%s">' % cmdname if "files" in cmdbody: basenames = cmdbody["files"].keys() for basename in basenames: fileitem = cmdbody["files"][basename] if "url" not in fileitem or fileitem["url"] == None: url = "None" else: url = fileitem["url"] xml += """<file basename="%(basename)s" url = "%(url)s" cmdnum = "%(cn)d"/>""" % { "basename": basename, "url": "" if "file" not in fileitem else fileitem["url"], "cn":int(cmdbody["cmdNum"])} # now any extension in the extdict if "extdict" in fileitem: extdict = fileitem["extdict"] for name in extdict.keys(): xml += """\n<file basename="%(basename)s" url="%(url)s" ext="%(ext)s" cmdnum="%(cn)d"/>""" % { "basename": basename, "ext": name, "url": extdict[name], "cn":int(cmdbody["cmdNum"])} xml += '</command>' xml += "</commandQueue>" self.wfile.write(xml) return if parms["path"] == "/recipeindex.xml": self.send_response(200) self.send_header('Content-type', 'text/xml') self.end_headers() self.wfile.write(rl.getRecipeIndex(as_xml=True)) return if parms["path"].startswith("/summary"): from fitsstore.FitsStorageWebSummary.Summary import summary from fitsstore.FitsStorageWebSummary.Selection import getselection selection = getselection({}) rec = PRec() summary(rec, "summary", selection, [], links=False) buff = rec._buff self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(buff) return if parms["path"].startswith("/calmgr"): from FitsStorageWebSummary.Selection import getselection from FitsStorageWebSummary.CalMGR import calmgr things = parms["path"].split("/")[2:] # print "ppw457:"+ repr(things) self.send_response(200) self.send_header('Content-type', 'text/xml') self.end_headers() # Parse the rest of the URL. selection=getselection(things) # If we want other arguments like order by # we should parse them here req = PRec() retval = calmgr(req, selection) print "-------\n"*3,"ppw469:", req._buff self.wfile.write(req._buff) return if parms["path"] == "/calsearch.xml": import searcher cparms = {} cparms.update(parms) print "pproxy466:"+repr(cparms) if "datalab" in parms: cparms.update({"datalab":parms["datalab"][0]}) if "filename" in parms: print "ppw481:", repr(parms["filename"]) cparms.update({"filename":parms["filename"][0]}) if "caltype" in parms: cparms.update({"caltype":parms["caltype"][0]}) else: cparms.update({"caltype":"processed_bias"}) buff = searcher.search(cparms) self.send_response(200) self.send_header('Content-type', 'text/xml') self.end_headers() self.wfile.write(buff) return if parms["path"].startswith("/globalcalsearch.xml"): from prsproxyutil import calibration_search flattenParms(parms) resultb = None resultf = None if "caltype" in parms: caltype = parms["caltype"] if caltype == "processed_bias" or caltype == "all": parms.update({"caltype":"processed_bias"}) resultb = calibration_search(parms, fullResult=True) if caltype == "processed_flat" or caltype == "all": parms.update({"caltype":"processed_flat"}) resultf = calibration_search(parms, fullResult = True) if caltype == "all": try: domb = minidom.parseString(resultb) domf = minidom.parseString(resultf) except: return None # can't parse input... no calibration calnodefs = domf.getElementsByTagName("calibration") if len(calnodefs) > 0: calnodef = calnodefs[0] else: calnodef = None calnodebs = domb.getElementsByTagName("dataset") if len(calnodebs) > 0: calnodeb = calnodebs[0] #print calnodef.toxml() #print calnodeb.toxml() # domb.importNode(calnodef, True) if calnodef and calnodeb: calnodeb.appendChild(calnodef) elif calnodef: result=domb.toxml() else: result=domb.toxml() result = domb.toxml() print "prsw207:", result self.send_response(200) self.send_response(200) self.send_header('Content-type', 'text/xml') self.end_headers() self.wfile.write(result) return if parms["path"] == "/recipecontent": if "recipe" in parms: recipe = parms["recipe"][0] content = rl.retrieve_recipe(recipe) self.send_response(200) self.send_header('Content-type', 'text/plain') self.end_headers() self.wfile.write(content) return if parms["path"] == "/adinfo": self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() if "filename" not in parms: return "Error: Need Filename Parameter" if "filename" in parms: try: ad = AstroData(parms["filename"][0]) except: self.wfile.write("Can't use AstroData to open %s" % parms["filename"]) return if "fullpage" in parms: self.wfile.write("<html><body>") if "fullpage" not in parms: # defaults to false self.wfile.write("<b>Name</b>: %s \n" % os.path.basename(ad.filename)) self.wfile.write("<br/><b>Path</b>: %s \n" % os.path.abspath(ad.filename)) self.wfile.write("<br/><b>Types</b>: %s\n" % ", ".join(ad.types)) recdict = rl.get_applicable_recipes(ad, collate=True) keys = recdict.keys() keys.sort() for key in keys: recname = recdict[key] self.wfile.write("<br/><b>Default Recipe(s)</b>:%s "+\ "(<i>due to type</i>: %s)" % (recname, key)) alldesc = ad.all_descriptors() self.wfile.write("<br/><b>Descriptors</b>:\n") self.wfile.write('<table style="margin-left:4em">\n') adkeys = alldesc.keys() adkeys.sort() self.wfile.flush() for desc in adkeys: value = str(alldesc[desc]) if "ERROR" in value: value = '<span style="color:red">' + value + '</span>' self.wfile.write("<tr><td>%s</td><td>%s</td></tr>\n" % (desc, value)) self.wfile.flush() self.wfile.write("</table>") if "fullpage" in parms: self.wfile.write("</body></html>") return if parms["path"] == "/recipes.xml": self.send_response(200) self.send_header('Content-type', 'text/xml') self.send_header("Access-Control-Allow-Origin", "http://localhost") self.end_headers() self.wfile.write(rl.list_recipes(as_xml = True) ) return if parms["path"] == "/reduceconfigs.xml": import glob rcfgs = glob.glob("./*.rcfg") self.send_response(200) self.send_header('Content-type', 'text/xml') self.end_headers() retxml = '<?xml version="1.0" encoding="UTF-8" ?>\n' retxml += "<reduceconfigs>\n" for rcfg in rcfgs: retxml += """\t<reduceconfig name="%s"/>\n""" % rcfg retxml += "</reduceconfigs>\n" self.wfile.write(retxml) return if parms["path"].startswith("/datadir.xml"): dirdict = self.getDirdict() ds = dirdict.dataSpider xml = dirdict.as_xml() self.send_response(200) self.send_header('Content-type', 'text/xml') self.end_headers() self.wfile.write('<?xml version="1.0" encoding="UTF-8" ?>\n') self.wfile.write("<datasetDict>\n") self.wfile.write(xml) self.wfile.write("</datasetDict>") self.wfile.flush() return if parms["path"] == "/runreduce": self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write("<html><head></head><body>\n") from StringIO import StringIO rout = StringIO() cmdlist = ["reduce", "--invoked", "--verbose=6"] cmdlist.extend(parms["p"]) logdir = ".autologs" if not os.path.exists(logdir): os.mkdir(logdir) reducelog = os.path.join(logdir, "reduce-addcinvokedlog-%d%s" % ( os.getpid(), str(time.time()) )) f = open(reducelog, "w") loglink = "reducelog-latest" if os.path.exists(loglink): os.remove(loglink) os.symlink(reducelog, loglink) # WARNING, this call had used Popen and selected on the # subprocess.PIPE... now uses call there is kruft remaining # (may move it back to old style soon but there was a bug) print "adcc running: \n\t" + " ".join(cmdlist) pid = subprocess.call( cmdlist, stdout = f, stderr = f) self.wfile.write('<b style="font-size=150%">REDUCTION STARTED</b>') self.wfile.write("<pre>") # self.wfile.flush() f.close() f = open(reducelog, "r") txt = f.read() # pretty the text ptxt = txt if (True): # make pretty ptxt = re.sub("STARTING RECIPE:(.*)\n", '<b>STARTING RECIPE:</b><span style="color:blue">\g<1></span>\n', ptxt) ptxt = re.sub("STARTING PRIMITIVE:(.*)\n", '<i>STARTING PRIMITIVE:</i><span style="color:green">\g<1></span>\n', ptxt) ptxt = re.sub("ENDING PRIMITIVE:(.*)\n", '<i>ENDING PRIMITIVE:</i> <span style="color:green">\g<1></span>\n', ptxt) ptxt = re.sub("ENDING RECIPE:(.*)\n", '<b>ENDING RECIPE:</b> <span style="color:blue">\g<1></span>\n', ptxt) ptxt = re.sub("(STATUS|INFO|FULLINFO|WARNING|CRITICAL|ERROR)(.*?)-(.*?)-", '<span style="font-size:70%">\g<1>\g<2>-\g<3>- </span>', ptxt) self.wfile.write(ptxt) # f.read()) f.close() try: while False: error = False while(True): stdout = None stderr = None r,v,w = select.select([pid.stdout],[],[],.1) print "prsw112:", repr(r) if len(r): stdout = r[0].read() print "prsw487:", stdout break; else: r,v,w = select.select([pid.stderr],[],[],.1) if len(r): stderr = pid.stderr.read() print "prsw494:", stderr break; if stdout: self.wfile.write(str(stdout)) if stderr: self.wfile.write("{"+stderr+"}") self.wfile.flush() if pid.poll()!= None: self.wfile.flush() break except: print "PRSW516 EMERGENCY:" self.wfile.write("</pre>") if False: r,v,x = select.select([pid.stderr], [], [], .1) if len(r): stderr = pid.stderr.read() else: stderr = None # stderr = pid.stderr.read(100) if stderr != None: self.wfile.write("<b><pre>\n") self.wfile.write(str(stderr)) self.wfile.write("</pre></b>") self.wfile.write('<b style="font-size=150%">REDUCTION ENDED</b>') self.wfile.write("\n</body></html>") self.wfile.flush() return if self.path == "/reducelist": #our dynamic content self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() # this is the tag in head that autopolls if wanted front = """ <html> <head> <meta http-equiv="refresh" content="2" /> </head> <body>""" page = front + """ %(body)s <body> </html>""" self.wfile.write(page) if True: body = "" body += "<b>date</b>: %s<br/>\n" \ % datetime.datetime.now().strftime("%A, %Y-%m-%d %H:%M:%S") body += "<u>Reduce Instances</u><br/>\n" body += "n.o. instances: %d\n" % rim.numinsts body += "<ul>" rdict = copy(rim.reducedict) rpids = rim.reducedict.keys() for rpid in rpids: body += "<li>client pid = %d at port %d</li>\n" \ % (rpid, rdict[rpid]["port"]) body += "</ul>" self.wfile.write(page % {"body":body}) self.wfile.flush() return if self.path == "/killprs": import datetime self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write("Killed this prsproxy instance, pid = %d at %s" \ %(os.getpid(), str(datetime.datetime.now()))) webserverdone = True return if self.path.startswith("/displaycache"): from CacheManager import get_cache_dir, get_cache_file path = os.path.split(self.path) print "prsw 569:", self.path if len (path)>1: slot = path[-1] tfile = get_cache_file(slot) try: f = open(tfile) except: return self.send_response(200) self.send_header('Content-type', 'image/png') self.end_headers() while True: t = f.read(102400) if t == "": self.wfile.flush() break self.wfile.write(t) return if self.path.startswith("/fullheader"): realpath = self.path.split('/') realpath = realpath[1:] dirdict = self.getDirdict() print "prsw514:", repr(realpath) name = realpath[-1] fname = dirdict.get_full_path(name) ad = AstroData(fname) self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write("<html><body>\n") self.wfile.write('<h2>%s</h2>\n' % name) self.wfile.write(ad.infostr(as_html=True)) alld = ad.all_descriptors() self.wfile.write( """ <table cellspacing="2px"> <COLGROUP align="right" /> <COLGROUP align="left" /> <thead> <tr> <td style="background-color:grey">Descriptor</td> <td style="background-color:grey">Value</td> </tr> </thead> """) alldkeys = alld.keys() alldkeys.sort() for dname in alldkeys: if type(alld[dname]) == str and "ERROR" in alld[dname]: redval = '<span style="color:red">'+str(alld[dname])+"</span>" dval = redval else: # print "ppw864:",type(alld[dname]) if not alld[dname].collapse_value(): import pprint dval = """<pre>%s</pre> """ \ % pprint.pformat(alld[dname].dict_val, indent=4, width=80) else: dval = str(alld[dname]) self.wfile.write(""" <tr> <td style="text-align:right;border-bottom:solid grey 1px"> %(dname)s = </td> <td style="border-bottom:solid grey 1px"> %(value)s </td> </tr> """ % { "dname":dname, "value":dval}) self.wfile.write("</table>") self.wfile.write("</body></html>\n") return if self.path.startswith("/htmldocs"): import FitsStorage realpath = self.path.split('/') realpath = realpath[1:] dirname = os.path.dirname(FitsStorage.__file__) fname = os.path.join(dirname, "htmldocroot", *realpath) #print "psrw456: %s\n" % repr(fname)*10 fnamelocal = os.path.join( os.path.dirname(fname), "FS_LOCALMODE_"+os.path.basename(fname) ) if os.path.exists(fnamelocal): fname = fnamelocal try: f = open(fname, "r") data = f.read() print repr(data) f.close() except IOError: data = "<b>NO SUCH RESOURCE FOUND</b>" self.send_response(200) if fname.endswith(".css"): self.send_header('Content-type', "text/css") elif fname.endswith(".png"): self.send_header('Content-type', "image/png") else: self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(data) return #what's the problem with this. if self.path.startswith("/cmd_queue"): self.counter += 1 data = str(self.counter) self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(data) return if self.path.startswith("/engineering"): self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() if "rim" in ADCCHandler.informers: rim = ADCCHandler.informers["rim"] evman = rim.events_manager import pprint data = "<u>Events</u><br/><pre>" data += "num events: %d\n" % len(evman.event_list) for mevent in evman.event_list: data += pprint.pformat(mevent) data += "------------------------------\n" data += "</pre>" self.wfile.write(data) return if self.path.startswith("/event_report.json"): self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() if "timestamp" in parms: timestamp = parms["timestamp"][0] else: timestamp = 0 print "prsprox:",timestamp self.wfile.write({"youbo":"mompun"}) return if self.path.startswith("/qap"): if ".." in self.path: self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() data = "<b>bad path error</b>" self.wfile.write(data) dirname = os.path.dirname(__file__) joinlist = [dirname, "../../scripts/adcc_faceplate/"] # Split out any parameters in the URL self.path = self.path.split("?")[0] #append any further directory info. joinlist.append(self.path[5:]) fname = os.path.join(*joinlist) self.log_message('"%s" %s %s', "Loading " + \ joinlist[1] + os.path.basename(fname), 203, '-') try: f = open(fname, "r") data = f.read() f.close() except IOError: data = "<b>NO SUCH RESOURCE AVAILABLE</b>" self.send_response(200) if self.path.endswith(".js"): self.send_header('Content-type', 'text/javascript') elif self.path.endswith(".css"): self.send_header("Content-type", "text/css") elif fname.endswith(".png"): self.send_header('Content-type', "image/png") else: self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(data) return else: print "not qap" if self.path == "/": self.path = "/KitchenSink.html" dirname = os.path.dirname(__file__) fname = os.path.join(dirname, "pyjamaface/prsproxygui/output", *(self.path[1:])) try: f = open(fname, "r") data = f.read() f.close() except IOError: data = "<b>NO SUCH RESOURCE FOUND</b>" self.send_response(200) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(data) return except IOError: raise print "handling IOError" self.send_error(404,'File Not Found: %s' % self.path)
import searcher import data_load import indexer #the functions will be called here in order to perform search_engine operations s=data_load.file_traverse() indexer.preprocess(s) print("Please enter the word") s1=searcher.search() print(s1) #Output1 #Please enter the word #whole and loaves #['whole', 'loaves'] #[['fortune1\\fortune1\\fortune2\\fortune2.txt']] #Output2 #Please enter the word #war or trapped #['war', 'trapped'] #[['fortune1\\fortune1\\fortune2\\fortune2.txt'], ['fortune1\\fortune1\\fortune2\\fortune3\\fortune4\\fortune5\\fortune6\\fortune7\\fortune8\\fortune9\\fortune10\\fortune11\\fortune11.txt']]
def searchForContent(): data_load.get_traversal_data() search_data = indexer.read_data() searcher.search(search_data)
def index_post(): text = request.forms.text return searcher.search(str(text))
file_data = get_traversal_data() web_data = visit_url(seed, "www.newhaven.edu", returnList) webPickle = "raw_web.pickle" dataPickle = "raw_data.pickle" # catch exceptions dealing with pickling the objects, as well as catching # any exceptions dealing with opening the file to begin with try: with open(webPickle, "bw") as out: try: pickle.dump(web_data, out) except pickle.PicklingError: print("Unpicklable object passed into dump().") except IOError as ioe: print("Unable to write to file: " + ioe.filename) # catch exceptions dealing with pickling the objects, as well as catching # any exceptions dealing with opening the file to begin with try: with open(dataPickle, "bw") as out: try: pickle.dump(file_data, out) except pickle.PicklingError: print("Unpicklable object passed into dump().") except IOError as ioe: print("Unable to write to file: " + ioe.filename) indexer.process_data("unh_shelve", "indexed_files", dataPickle, webPickle) searcher.search("unh_shelve", "indexed_files")
def index(request, entity_id=None): current_step = 1 docs = [] only_one = False if not request.user.is_authenticated(): login_required = True else: login_required = False current_step = 2 doc_id = entity_id selected_id = None selected_library = None if doc_id: selected_doc = get_doc_from_index( settings.INDEX_NAME, 'organisation', doc_id) if selected_doc: selected_library = get_source(selected_doc) selected_id=doc_id current_step = 3 if selected_library['organisation_type'] != 'library': family_tree = searcher.search_family_tree(user=request.user) member = family_tree.get(doc_id) if member: selected_library['children'] = [ (cid, cname) for cid, cname in zip(member['children'], member['children_names'])] parents = member['parent_names'] parents.reverse() selected_library['parents'] = ' → '.join(parents) else: if not login_required: request.session.set_expiry(12*60*60) current_step = 2 required_fields = ['organisation_type', 'contact.street_address.municipality_fi', 'name_fi', 'parent_organisation', 'meta.modified'] if request.user.is_authenticated(): if request.user.is_superuser: results = searcher.search( # todo: remove size searcher.ALL_LIBS, size = 10000, fields = required_fields) else: results = searcher.front_page_search( request.user, size = 500, fields = required_fields) family_tree = searcher.search_family_tree(user=request.user) for doc in results[0]: if 'organisation_type' not in doc: print(doc) if (doc['organisation_type'] in ['branchlibrary', 'unit', 'library']): family_member = family_tree.get(doc['_id']) if family_member: parents = family_member['parent_names'] children = family_member['children_names'] children_ids = family_member['children'] if len(parents): parents.reverse() doc['parents'] = ' → '.join(parents) if len(children): doc['children'] = [(cid, cname) for cid, cname in zip(children_ids, children)] if (doc['organisation_type'] not in ['department', 'mobile_stop']): docs.append(doc) if not login_required and len(docs) == 1: current_step = 3 selected_library = docs[0] selected_id = selected_library['_id'] only_one = True t = env.get_template('index.html') note_filter_date = dt.datetime.now() - dt.timedelta(days=7) notifications = models.Notification.objects.unread(request.user, note_filter_date) context = { 'login_required' : login_required, 'docs' : docs, 'library_count' : len(docs), 'username' : request.user.username, 'access_to_templates' : ( not login_required and request.user.has_perm('sites.access_to_templates')), 'summary' : teaser('organisation'), 'selected_library' : selected_library, 'selected_id' : selected_id , 'frontpage_url' : frontpage_url, 'editpage_url' : editpage_url, 'current_step' : current_step, 'only_one' : only_one, 'library_limit' : 5, 'username_string' : _("Username"), 'password_string' : _("Password"), 'login_string' : _("Log in"), 'notifications' : notifications, 'can_export' : user_has_group(request.user, 'export') } context.update(csrf(request)) # todo: better way? s = template_render(t, context) return HttpResponse(s)
if not args.datadir: args.datadir = "../20news-bydate-test" if not args.indexfile: args.indexfile = "../index.data" # If the user wants to create the indexes if args.index: # Open all the files documents = connecter.fetch(args.datadir) # Tokenize, lowercase, remove accents tokenizedDocs = tokenizer.analyze(documents, [textprocessor.Normalizer()]) # Create index index = indexer.buildIndex(tokenizedDocs) # Serialize index in file index.save(args.indexfile) # If the user passed search keywords as parameter if args.words: # Read index from file index = indexer.loadIndex(args.indexfile) # Search in index with the specified keywords results = searcher.search(index, args.words) # Display the results print("Your request is matched in the following files:") if len(results) > 0: for r in results: print(r) else: print("No file matches your request.")
import searcher import data_load import indexer import weather #the functions will be called here in order to perform search_engine operations str_set=data_load.main() indexer.preprocess(str_set) print("Please enter the word") query=input() while(query!="q"): weather.weatherInfo() s1=searcher.search(query) print("Search Results: ") for x in s1: print(x[0]) print("Please enter any word other than 'q' to continue") query=input() #Output1 #Please enter the word #VIP #weather Conditions : Rain #Performing search operation for {'VIP'} #Search Results: #www.newhaven.edu/admissions/VIP/ #Please enter any word other than 'q' to continue #admissions and ugrad #weather Conditions : Rain #Performing search operation for {'admissions', 'ugrad'}
for t in temp : if ( t == "and" ) : op = "and" elif ( t != "or" ) : temp1.add(t) print( "Performing '" + op.upper() + "' search for: " + str(temp1) ) out = list(temp1) try : page = urllib.request.urlopen("http://api.openweathermap.org/data/2.5/weather?q="+"06516") code = page.getcode() if(code == 200 ) : content=page.read() content_string = content.decode("utf-8") json_data = json.loads(content_string) name = json_data["name"] weather = json_data["weather"][0]["main"] sun_rise = json_data["sys"]["sunrise"] sun_set = json_data["sys"]["sunset"] except URLError as e : print("error") dictionary_data = indexer.indexer() print() print("location : " + str(name) + " Weather : " + str(weather) + " Sun Rise : " + str(sun_rise) + " Sun Set : " + str(sun_set)) print() searcher.search(dictionary_data,out,op)
ap.add_argument("-i", "--index", required=True, help="Path to storage directory") ap.add_argument("-q", "--query", required=True, help="Path to query directory") ap.add_argument("-r", "--result-path", required=True, help="Path to results directory") args = vars(ap.parse_args()) # initialize colour descriptor cd = colourdescriptor.ColourDescriptor((8, 12, 3)) # load query and get it's features query = cv2.imread(args["query"]) features = cd.describe(query) # perform search searcher = searcher.Searcher(args["index"]) results = searcher.search(features) # display query cv2.imshow("Query", query) # loop over results for (score, result_id) in results: result = cv2.imread(result_id) cv2.imshow("Result", result) cv2.waitKey(0)
import searcher import data_load import indexer import WEBcrawler import weather weather.weather() visit_url.visit_url() indexer.indexer() searcher.searcher() data_load.traverser() d = indexer.indexer("raw_data.pickle","shelve") searcher.search(d)
import data_load import searcher import indexer import webcrawler data_load.traverser() webcrawler.webcrawler() d = indexer.process_data("raw_data.pickle", "webdata.pickle") searcher.search("fortune_shelve")
#traverser() #web_crawler() #indexer.create_shelve("raw_data.pickle","url_data.pickle") #I am asking my query in the combine_search.py because I would need it for other modules as well, eg. weather print("\n\n"+"===>>> Welcome to Avik's Search Engine <<<===") query = input("Query: ") query = query.lower().strip(" ") #get rid of the front and rear spaces query = query.split(" ") #take every word into a list with a space being the delimiter query = list(set(query)) #removes repeated items from the list and converting back to list from set if ("or" in query) and ("and" not in query):#if "and" is present, "and" operation should take place query.remove("or")#get rid of the "or" content from the list search_type = "or" #elif performs "and" operation search, user could just type "flower sheep" without operator elif ("and" in query) or (len(query)>1 and ("and" not in query)and("or" not in query)): if "and" in query: query.remove("and") if "or" in query: query.remove("or") search_type = "and" else: search_type = ""#when user inputs only one string this no particular search type is used get_weather(query) search(query,search_type,"dictionary_data") #so far working with only title and metadata.
import searcher import indexer d = indexer.process_data("raw_data.pickle", "fortune_shelves"); d = indexer.process_data("urls.pickle", "fortune_shelves"); searcher.search("fortune_shelves");
json_data = json.loads(content_string) city = json_data.get('name', None) weather = json_data.get("weather") if city: print('The weather for {} is {}'.format(city, weather[0]['main'])) query=input("query: ") if not query: print("Must enter something") exit(0) qtype, keywords = searcher.detect_query_type(query) #Initiate Task 4 call_weather_api(keywords) #Process both pickle files and store in fortunes_shelve indexer.preprocess(['web_data.pickle','data.pickle',], 'fortunes_shelve') output = searcher.search('fortunes_shelve', qtype, keywords) if output: for found in output: print("Found at ", found) else: print ("Not Found")
from data_load import traverser from indexer import create_dictionary from searcher import search traverser() create_dictionary() search("dictionary_data")
import searcher import indexer from indexer import query import data_load import crawler_new indexer.process_data("raw_data.txt","shelve_file") indexer.process_data("raw_data1.txt","shelve_file") searcher.search("shelve_file",query)
import dataload import indexer import searcher #dataload.traverse() indexer.dict() searcher.search()
import searcher import indexer import data_load dict_words = indexer.process_data("raw_data.txt","shelve_file") searcher.search("shelve_file")
except URLError as e: print("error") return returnList from data_load import get_traversal_data import indexer import searcher import pickle crawler_backlog = {} seed = "http://www.newhaven.edu/" crawler_backlog[seed]=0 returnList = [] file_data = get_traversal_data() web_data = visit_url(seed, "www.newhaven.edu", returnList) webPickle = "raw_web.pickle" dataPickle = "raw_data.pickle" out = open(webPickle, "bw") pickle.dump(web_data, out) out.close() out = open(dataPickle, "bw") pickle.dump(file_data, out) out.close() indexer.process_data("unh_shelve", "indexed_files", dataPickle, webPickle) searcher.search("unh_shelve", "indexed_files")
import searcher import data_load import indexer import weather #the functions will be called here in order to perform search_engine operations str_set = data_load.main() indexer.preprocess(str_set) print("Please enter the word") query = input() while (query != "q"): weather.weatherInfo() s1 = searcher.search(query) print("Search Results: ") for x in s1: print(x[0]) print("Please enter any word other than 'q' to continue") query = input() #Output1 #Please enter the word #VIP #weather Conditions : Rain #Performing search operation for {'VIP'} #Search Results: #www.newhaven.edu/admissions/VIP/ #Please enter any word other than 'q' to continue #admissions and ugrad #weather Conditions : Rain #Performing search operation for {'admissions', 'ugrad'} #Search Results:
# Subscribe to LandmarkDetected event from ALLandMarkDetection proxy. landmarkProxy.subscribe("landmarkTest") markData = memoryProxy.getData("LandmarkDetected") # Wait for a mark to be detected. @timeout(6) def find_mark(markData): while (len(markData) == 0): markData = memoryProxy.getData("LandmarkDetected") return markData try: markData = find_mark(markData) except Exception, e: print "time out, head's gonna move" result = search(ip) if len(result) == 0: print "timeout completely" print "ERROR: ", e sys.exit(1) else: markData = result print "just found it" ## finally: ## pass#sys.exit(1) print "markdata" print markData motionProxy.setStiffnesses("Head", 1.0) # Retrieve landmark center position in radians. wzCamera = markData[1][0][0][1]
#import data_load import indexer import searcher query=input("query: ") query = query.strip(" ").split() #get rid of the front and rear spaces, space being the delimiter web_query = [] query = list(set(query)) web_query = query[:] d = indexer.get_traversal_data("raw_data.pickle.txt") searcher.search(d,query) w = indexer.get_traversal_data("web_data.pickle.txt") searcher.search(w,web_query)
def crawl(this, keyWord, nReq=MAX_DOC, sortReq='', fieldReq='', timeSpan=None): ''' 一个生成器,用于采集数据 以wos核心合集为数据库,按指定时间跨度的关键词检索 以日期降序、被引频次降序、相关性等排序方式排序检索结果 从检索结果中抽取文章的标题、作者、通讯作者、电子邮箱等字段 Parameters ---------- keyWord : 关键词 nReq : 需要采集的条数,默认为全采 sortReq : 检索结果排序代码,见sortId.py timeSpan : 检索的时间跨度 Returns ------- crawl : 一个生成器,生成获取的paper各字段 ''' if nReq is None or nReq < 0: nReq = MAX_DOC driver = this.driver driver.get(this.home) print('INFO : home page opened') selectDatabase(driver) print('INFO : database selected') selectSpan(driver, timeSpan) print('INFO : time span selected') selectSearchField(driver, fieldReq) print('INFO : search field selected') search(driver, keyWord) msg = ifSearchFailed(driver) assert not msg, 'search failed : "%s"' % msg print('INFO : search succeed') sid, qid = getIds(driver) sortResults(driver, sid, qid, sortReq) print('INFO : sort succeed') nRst = getNumOfRst(driver) if MAX_DOC < nRst: print( 'WARNING : too much results, please consider shortening time span' ) switchLabel(driver, -1) print('INFO : start to extract data') i = 0 maxI = min(nReq, nRst, MAX_DOC) tStart = time() ts = tc = 0 for lnk in getLnks(driver, nReq, nRst): i += 1 sTimeCost = ', %.2fs last page' print('INFO : extracting %d/%d%s' % (i, maxI, '' if i == 1 else sTimeCost % tc), end='\r') ts = time() newLabel(driver, lnk) switchLabel(driver, -1) this.__waitTillOpen() rst = extractValues(driver) driver.close() switchLabel(driver, -1) tc = time() - ts yield rst tCost = time() - tStart print('\nINFO : extracting done, %.2fs/paper' % (tCost / maxI))
def search(query): return json.encode(searcher.search(query))
city = json_data.get('name', None) weather = json_data.get("weather") if city: print('The weather for {} is {}'.format(city, weather[0]['main'])) query = input("query: ") if not query: print("Must enter something") exit(0) qtype, keywords = searcher.detect_query_type(query) #Initiate Task 4 call_weather_api(keywords) #Process both pickle files and store in fortunes_shelve indexer.preprocess([ 'web_data.pickle', 'data.pickle', ], 'fortunes_shelve') output = searcher.search('fortunes_shelve', qtype, keywords) if output: for found in output: print("Found at ", found) else: print("Not Found")
#***************************************************************************************** #CSCI 6651 #Homework 4 File Traverse #Building a search engine #search_combine.py #Author: Yong Deng #Since: 5-7-2015 #This program is a to call other modules to perform searching. #***************************************************************************************** import data_load import indexer import searcher data = indexer.processData("raw_data.pickle", "fortune_shelve") searcher.search("fortune_shelve") #================================================================================================================= # Sample outputs #================================================================================================================= # Yongs-MBP-5:YongDeng_HW4_fileTrasverser yongdeng$ python3 search_combine.py # query:the and was or if # Performing AND search for: {'was', 'the', 'if'} # >>Found at /Users/yongdeng/Documents/CS/UNH assignments/CSCI 6651 python/myPython/YongDeng_HW4_fileTrasverser/fortune1/fortune2/fortune3/fortune4/fortune5/fortune6/fortune7/fortune8/fortune9/fortune9.log # Execution time: 231 #-----------------------------------------------------------------------------------------------------------------
def load_tracks(): ''' Load tracks from disk ''' albums, tracks = {}, [] mp3_files = get_all_files(PRE_PROCESS_DIR) # TODO: should be inside a for loop as we traverse through the directory given by PRE_PROCESS_DIR for filepath in mp3_files: if not load_track(filepath, albums, tracks): log('skipped file due to insufficient track info: ' + filepath) else: log('loaded file: ' + filepath) # TODO: for loop ends here # process tracks in 'albums' first covers = {} for album_index in albums: album = albums[album_index] artist_name = get_artist_str(album.artists, '') # TODO: change the next line track_count = len(album.tracklist) if len(album.tracklist) > 8 else None res_albums = search(album.title, artist_name, track_count) best_album, best_mapping, score = Album.find_best_album_match(album, res_albums) if not best_album: log('no matched album found for ' + album.title) continue else: pass # add cover for the album img_file, scrap_err = get_album_covers(best_album.artists[0], best_album.title) if scrap_err: break # TODO: ask if the user wants to proceed even no album cover can be obtained instead elif img_file: best_album.add_cover(img_file) for i, j in best_mapping: log('{} -> {} ({})'.format(str(album.tracklist[i]), str(best_album.tracklist[j]), str(best_album)), 'change') # TODO: let user confirm using input() album.tracklist[i].apply_track_diff(best_album.tracklist[j]) for track in album.tracklist: if track.changed: track.save() log('file saved with new changes: ' + track.filepath) # move the file to new location and rename it new_filepath = construct_filepath(track) log('new file path: ' + new_filepath) move_file(track.filepath, new_filepath) log('file moved') track.filepath = new_filepath else: tracks.append(track) # else treat it as an individual track # TODO: then process tracks in 'tracks' album = Album() for track in tracks: album.tracklist.append(track) artist_name = get_artist_str(track.artists, '') res_albums = search(track.title, artist_name, track=True) # noe bascally copy and paste, may be I should factor the code at some point best_album, best_mapping, score = Album.find_best_album_match(album, res_albums) if not best_album: log('no matched album found for ' + track.title) continue else: pass # add cover for the album img_file, scrap_err = get_album_covers(best_album.artists[0], best_album.title) if scrap_err: break # TODO: ask if the user wants to proceed even no album cover can be obtained instead elif img_file: best_album.add_cover(img_file) for i, j in best_mapping: log('{} -> {} ({})'.format(str(album.tracklist[i]), str(best_album.tracklist[j]), str(best_album)), 'change') # TODO: let user confirm using input() album.tracklist[i].apply_track_diff(best_album.tracklist[j]) if track.changed: track.save() log('file saved with new changes: ' + track.filepath) # move the file to new location and rename it new_filepath = construct_filepath(track) log('new file path: ' + new_filepath) move_file(track.filepath, new_filepath) log('file moved') track.filepath = new_filepath album.tracklist.clear() delete_dir()