Пример #1
0
def get_post():
    r = request.get_json()
    if r['type'] == 'message_new':
        parse_command(r['object'])
    elif r['type'] == 'send_digest':
        searcher.search()
    return 'ok'
Пример #2
0
def ui_searcher():
    print('Searcher')
    es_connection = input(
        'Elasticsearch connection? (default to localhost:9200) ')
    params = dict()
    if es_connection:
        params['es_connection'] = {
            'host': es_connection.split(':')[0],
            'port': int(es_connection.split(':')[1]),
        }
    searcher.search(**params)
Пример #3
0
def searchResult():
    query = request.args.get('query')
    urls = [''] * 5
    searchEni = search(glob(os.path.join("indexFile", "*TokenDocId.txt"))[0])
    if query != None and query != "":
        start = time.time()
        try:
            urls = searchEni.start(query)
        except:
            urls = [''] * 5
        searchTime = time.time() - start
        flash(f"Searching completed in {round(searchTime, 5)} seconds.")

    # add pagination
    page, per_page, offset = get_page_args(page_parameter='page',
                                           per_page_parameter='per_page')
    pagination_urls = getUsers(urls, offset=offset, per_page=per_page)
    pagination = Pagination(page=page,
                            per_page=per_page,
                            total=len(urls),
                            alignment="center",
                            css_framework='bootstrap4')

    query = "Enter here" if query == None else query
    return render_template("searchResult.html",
                           query=query,
                           urls=pagination_urls,
                           page=page,
                           per_page=per_page,
                           pagination=pagination)
def find_relevant(query_path, low, high, main):
    if query_path == '':
        messagebox.showinfo("Error",
                            "Please upload the query image before searching!")
        return []
    query_image = cv2.imread(query_path)
    query_seghist_features = img2modihist(query_image)
    img = w2d(query_path, 'db1', 5)
    grey = gc(img, [1], [
        0, np.pi / 8, np.pi / 4, 3 * np.pi / 8, np.pi / 2, 5 * np.pi / 8,
        3 * np.pi / 4, 7 * np.pi / 8
    ],
              levels=256,
              normed=True)
    contrast = gp(grey, 'contrast')
    energy = gp(grey, 'energy')
    correlation = gp(grey, 'correlation')
    (h, w) = img.shape[:2]
    for x in range(8):
        query_seghist_features.append(contrast[0][x] / 10000)
    for x in range(8):
        query_seghist_features.append(energy[0][x] * 10)
    for x in range(8):
        query_seghist_features.append(correlation[0][x])

    retrieve_count = 10

    best_seghist = search(query_seghist_features, retrieve_count, low, high,
                          main)
    return best_seghist
Пример #5
0
 def get(self):
     user_id = self.request.cookies.get('user_id')
     logging.info('[SearchHandler:GET] user_id: [%s]' % user_id)
     if user_id is None or len(user_id) == 0:
         self.redirect("/")
         return
     q = self.request.get('q')
     p = self.request.get('p')
     logging.info('[SearchHandler:GET] q: [%s] p:[%s]' % (q, p))
     user = User.get_by_key_name(user_id,parent=None)
     posts = search(user, q, p)
     results = []
     for post in posts:
         if post:
             ids = post.id.split('_')
             result ={}
             result['from_name'] = post.from_name
             result['created_time'] = post.created_time
             result['date_time'] = datetime.datetime.strptime(post.created_time, '%Y-%m-%dT%H:%M:%S+0000')
             result['message'] = post.message
             result['url'] = 'http://www.facebook.com/'+ids[0]+'/posts/' + ids[1]
             results.append(result)
     template_values = {
         'user':user,
         'logout_url':settings.LOGOUT_URL,
         'results': results,
         'query': q,
         'phrase':p
     }
     template = jinja_environment.get_template('templates/search.html')
     self.response.out.write(template.render(template_values))
Пример #6
0
    def run(self):
        while True:
            # reflash once per 2 minutes
            sleep(120)
            internet_usage = search()

            # send the signal
            self.trigger.emit(internet_usage)
Пример #7
0
 def do_GET(self):
     parsed_path = urlparse.urlparse(self.path)
     try:
         message = searcher.search(parsed_path.query)    #query passed into the searcher . make sure to prefix your queries by '?' i browser.
         self.send_response(200)                         #marking the end of response by blank line                         
         self.end_headers()  
         self.wfile.write(message)                       # Writing the data to a output file
     except:
         raise 404       
     return
Пример #8
0
def get_results_from_searcher(nlp_switch, search_bar, scoring_measure):
    index_id = None
    results = dict()
    if (nlp_switch == "true"):
        results = searcher.search(index_id, search_bar, scoring_measure)
    else:
        results = searcher.multiwordquery_driver(index_id, search_bar,
                                                 scoring_measure)

    return results
Пример #9
0
def search():
    query = request.args.get('q')
    results = searcher.search(query, rindex)
    for x in results:
        x.title = x.title[:config.max_title_len] + '...' if len(
            x.title) > config.max_title_len else x.title
    return render_template('results.html',
                           results=results,
                           count=len(results),
                           query=query)
    def Clicked3():
        
        cd = Descriptor((8, 12, 3))
        in_path = filedialog.askopenfilename()
        num= 1
        print (in_path)

        query = cv2.imread(in_path)
        features = cd.describe(query)
        #histogram of the input microstructure is compared to histograms of all other microstructures/
        searcher = Searcher("compare_hist.csv")
        results = searcher.search(features)
        print(results)
        
        root = Toplevel()
        root.grid_rowconfigure(0, weight=1)
        root.grid_columnconfigure(0, weight=1)
        cnv = Canvas(root)
        cnv.grid(row=0, column=0, sticky='nswe')
        hScroll = Scrollbar(root, orient=HORIZONTAL, command=cnv.xview)
        hScroll.grid(row=1, column=0, sticky='we')
        vScroll = Scrollbar(root, orient=VERTICAL, command=cnv.yview)
        vScroll.grid(row=0, column=1, sticky='ns')
        cnv.configure(xscrollcommand=hScroll.set, yscrollcommand=vScroll.set)
        frm = Frame(cnv)
        cnv.create_window(0, 0, window=frm, anchor='nw')
            
        for r,s in results:
            
            print('inside')
            
            img = image.load_img(s, target_size=(224, 224))
            if img is not None:
                t = "test"
                x = image.img_to_array(img)
                x = np.expand_dims(x, axis=0)
                x = preprocess_input(x)
                features = model.predict(x)[0]
                features = features.reshape(1,2048)
                p = m.predict_classes(features)
                p_f = m_f.predict_classes(features)
                k = m.predict(features)
                print(p[0])
                t, s_f = getText(p[0],p_f[0])
                
            nn= str(num)
            im = PIL.Image.open(s).resize((200,200))   
            tkimage =  PIL.ImageTk.PhotoImage(im)
            myvar=Label(frm,image = tkimage, text ="(" + nn + ") " + t + " / " + s_f, compound=tkinter.BOTTOM)
            myvar.image = tkimage
            num = num + 1
            myvar.pack()

        frm.update_idletasks()
        cnv.configure(scrollregion=(0, 0, frm.winfo_width(), frm.winfo_height()))
Пример #11
0
def search():
    results_array = []

    # Get url
    image_url = request.form.get('img')

    # Perform the search
    results = searcher.search(image_url)

    # Return success
    return jsonify(results=results, preview="images/" + image_url)
Пример #12
0
def fsearch(words):
    print("search")
    form2 = search_form()
    result = search(words)
    for line in result:
        print line[0],line[1]

    location = search_location(words)
    print(location)    
        
    return render_template('exform.html',words=words,result = get_text(result),form=form2,collocation = get_location_text(location))
Пример #13
0
 def do_GET(self):
     parsed_path = urlparse.urlparse(self.path)
     try:
         message = searcher.search(
             parsed_path.query
         )  #query passed into the searcher . make sure to prefix your queries by '?' i browser.
         self.send_response(200)  #marking the end of response by blank line
         self.end_headers()
         self.wfile.write(message)  # Writing the data to a output file
     except:
         raise 404
     return
Пример #14
0
def result():
    from searcher import search

    keyword = request.args.get('keyword').strip().encode('utf-8')
    keywords = keyword.split(' ')

    if keyword == '' or len(keywords) == 0:
        return render_template('search.html')

    # word, docId, phase, score
    results = search(keywords)
    results = map(lambda a: (a[0], paths[a[0]], a[1], a[2]), results)
    return render_template('result.html', keyword=keyword, results=results)
Пример #15
0
def parser_agent(parsed, texts, address="body"):
    parsed_first = parsed[0].select(address)[0]

    if parsed_first.get('id') is not None:
        address = "#{}".format(parsed_first.get('id'))

    parsed_first_classes = parsed_first.get('class')

    if parsed_first_classes is not None:
        for i in range(len(parsed_first_classes)):
            if len(parsed[0].select(".{}".format(
                    parsed_first_classes[i]))) == 1:
                address = ".{}".format(parsed_first_classes[i])
                break

    parsed_first_children = list(parsed_first.children)
    n = len(parsed_first_children)
    parsed_per_name = {}

    for i in range(n):
        child_name = parsed_first_children[i].name
        if child_name is None:
            continue
        if child_name in parsed_per_name:
            parsed_per_name[child_name] += [parsed_first_children[i]]
        else:
            parsed_per_name[child_name] = [parsed_first_children[i]]

    max_point = 0
    max_address = ""
    for key in parsed_per_name:
        for i in range(len(parsed_per_name[key])):
            if len(address) == 0:
                new_address = "{}:nth-of-type({})".format(key, i + 1)
            else:
                new_address = "{} > {}:nth-of-type({})".format(
                    address, key, i + 1)
            point = 0
            for j in range(len(texts)):
                is_in_address = search(parsed[j], new_address, texts[j])
                if is_in_address:
                    point += 1
            if point > max_point:
                max_point = point
                max_address = new_address
    if max_point != len(texts):
        print("answer is {}".format(address))
    else:
        parser_agent(parsed, texts, max_address)
Пример #16
0
def execute(query, database, results_number):
    """
    Executes query and outputs a number of the top search results
    :param query:
    :param database:
    :param results_number:
    :return:
    """
    (query_key, webenv) = search(query, database, results_number)

    records = fetch(query_key, webenv, database, results_number)

    printout(records)

    return
Пример #17
0
    def __init__(self):
        internet_usage = search()
        super(MainWindow, self).__init__()
        self.ui = Ui_Dialog()
        self.ui.setupUi(self)
        self.ui.lcdNumber.display(internet_usage)

        # daily limit 20GB
        usage_percent = float(internet_usage) / 20 * 100
        print(usage_percent)

        # set the progress bar
        self.ui.progressBar.setValue(usage_percent)
        self.ui.progressBar.setFormat("%.02f %%" % usage_percent)

        # this is a thread which can get the data and reflash the window
        self.thread = Thread()
        self.thread.trigger.connect(self.reflasher)

        self.thread.start()

        self.setWindowTitle("網路用量表 %.01f %%" % usage_percent)
Пример #18
0
    def fill_filelist(self, search, current_search):
        self.filelist.clear()

        already_matched = {}
        counter = [-1]

        def tick():
            counter[0] += 1
            if counter[0] % 50 == 0:
                refresh_gui()
                if self.current_search is not current_search:
                    raise StopIteration()

        bad_matchers = {}
        for r in self.roots:
            try:
                bad_re = self.pwindow().manager.get_context_manager(r).get()['quick_open']['ignore']
            except KeyError:
                bad_matchers[r] = None
            else:
                bad_matchers[r] = lambda path, bre=bad_re: bre.search(path)

        for m in (searcher.name_start_match, searcher.name_match,
                searcher.path_match, searcher.fuzzy_match):
            for root in self.roots:
                for p in searcher.search(os.path.dirname(root), os.path.basename(root),
                        m(search), already_matched, bad_matchers[root], tick):
                    if self.current_search is not current_search:
                        return

                    already_matched[p] = True
                    self.filelist.append(p)

                    if len(self.filelist) > 150:
                        self.filelist_tree.columns_autosize()
                        return

        self.filelist_tree.columns_autosize()
def statistics(request):
    organisations, city_facet = searcher.search({
        "meta": {},
        "searches": [
            { "TermQuery": [["_type", "organisation"]] }
        ],
        "sort": "contact.street_address.municipality_fi,name_fi",
        "facets": ["contact.street_address.municipality_fi"]
    })

    organisations = build_organisation_city_tree(organisations)

    #print(organisations)

    cities = extract_cities_from_facet(city_facet["contact.street_address.municipality_fi"])

    view_data = {
        'organisations': organisations,
        'cities': cities,
        'years': [y for y in range(2013, datetime.date.today().year)]
    }

    return template_render('statistics.html', view_data)
Пример #20
0
def food_price(city):
    default_price = 15.

    if city is None:
        return default_price

    query = 'average meal cost in {}'.format(city)

    ans = searcher.search(query)

    print('Expected food price at {}: '.format(city))
    match = re.findall('\$\d+(?:\.\d+)?', ans)
    prices = [
        float(''.join(list(filter(utils.isdigit_or_dot, m)))) for m in match
    ]

    if not prices:
        avg_price = default_price
    else:
        avg_price = sum(prices) / len(prices)

    print('${:.2f}'.format(avg_price))
    return avg_price
Пример #21
0
def flight_price(from_city, to_city):
    if from_city is None or to_city is None:
        return None

    query = 'price from {} to {}'.format(from_city, to_city)
    ans = searcher.search(query)

    # match = re.search('\$\d+(?:\.\d+)?', ans)
    match = re.findall('\$\d+(?:\.\d+)?', ans)

    if not match:
        print('City pairs not supported yet, try another pair')
        return None
    else:
        print('Price from {} to {}: '.format(from_city, to_city))
        # price = float(''.join(list(filter(utils.isdigit_or_dot, match.group(0)))))
        prices = [
            float(''.join(list(filter(utils.isdigit_or_dot, m))))
            for m in match
        ]
        price = sum(prices) / len(prices)
        print('${:.2f}'.format(price))
        return price
Пример #22
0
    def fill_filelist(self, search, current_search):
        self.filelist.clear()

        already_matched = {}
        counter = [-1]

        def tick():
            counter[0] += 1
            if counter[0] % 50 == 0:
                refresh_gui()
                if self.current_search is not current_search:
                    raise StopIteration()

        root = self.get_current_root()

        try:
            bad_re = settings.ignore_contexts[root]['ignore']
            def bad_matcher(path):
                return bad_re.search(path)

        except KeyError:
            bad_matcher = None

        for m in (searcher.name_start_match, searcher.name_match,
                searcher.path_match, searcher.fuzzy_match):
            for p in searcher.search(root, '', m(search), already_matched, bad_matcher, tick):
                if self.current_search is not current_search:
                    return

                already_matched[p] = True
                self.filelist.append(p)

                if len(self.filelist) > 150:
                    self.filelist_tree.columns_autosize()
                    return

        self.filelist_tree.columns_autosize()
Пример #23
0
#Brandon Marshall       
#Python Scripting
#October 1, 2015
#Homework 4 - File Traverser

import data_load
import indexer
import searcher

data_load.get_traversal_data()
indexer.process_data("raw_data.pickle", "fortunes_shelve", "indexed_files")
searcher.search("fortunes_shelve", "indexed_files")
Пример #24
0
    def do_GET(self):
        global webserverdone
        self.state = ppwstate
        rim = self.informers["rim"]
        parms = parsepath(self.path)

        # Older revisions of adcc may not supply 'verbose' key
        try: 
            self.informers["verbose"]
        except KeyError: 
            self.informers["verbose"] = True

        try:
            if self.path == "/":
                page = """
                <html>
                <head>
                </head>
                <body>
                <h4>prsproxy engineering interface</h4>
                <ul>
                <li><a href="/engineering">Engeering Interface</a></li>
                <li><a href="qap/engineering.html">Engeering AJAX App</a></li>
                <li><a href="datadir">Data Directory View</a></li>
                <li><a href="killprs">Kill this server</a> (%(numinsts)d """ +\
                    """copies ofreduce registered)</li>
                </ul>
                <body>
                </html>"""
                page % {"numinsts":rim.numinsts}
                self.send_response(200)
                self.send_header("content-type", "text-html")
                self.end_headers()
                self.wfile.write(page)
                return
                
            if parms["path"].startswith("/rqlog.json"):
                self.send_response(200)
                self.send_header('Content-type', "application/json")
                self.end_headers()
                
                if "file" in parms:
                    logfile = parms["file"][0]
                    print logfile
                    if not os.path.exists(logfile):
                        msg = "Log file not available"
                    else:
                        f = open(logfile, "r")      
                        msg = f.read()
                        f.close()
                else:
                    msg = "No log file available"

                tdic = {"log":msg}

                self.wfile.write(json.dumps(tdic, sort_keys=True, indent=4))
                return
 
           # ------------------------------------------------------------------
           # Server time

            if parms["path"].startswith("/rqsite.json"):
                self.send_response(200)
                self.send_header('Content-type', "application/json")
                self.end_headers()
                tdic = server_time()
                self.wfile.write(json.dumps(tdic, sort_keys=True, indent=4))
                return

            # ------------------------------------------------------------------
            # Metrics query employing fitsstore

            if parms["path"].startswith("/cmdqueue.json"):
                self.send_header('Content-type', "application/json")
                self._handle_cmdqueue_json(rim, parms)
                return

            # ------------------------------------------------------------------
            
            if parms["path"].startswith("/cmdqueue.xml"):
                self.send_response(200)
                self.send_header('Content-type','text/xml')
                self.end_headers()
                
                if "lastcmd" in parms:
                    start = int(parms["lastcmd"][0])+1
                else:
                    start = 0   
                elist = self.state.rim.displayCmdHistory.peekSince(cmdNum=start)
                print "prsw 200:", repr(elist)
                xml = '<commandQueue lastCmd="%d">' % (start-1)
                for cmd in elist:
                    # this is because there should be only one top key
                    #   in the cmd dict
                    cmdname = cmd.keys()[0] 
                                            
                    cmdbody = cmd[cmdname]
                    xml += '<command name="%s">' % cmdname
                    
                    if "files" in cmdbody:
                    
                        basenames = cmdbody["files"].keys()
                        for basename in basenames:
                            fileitem = cmdbody["files"][basename]
                            if "url" not in fileitem or fileitem["url"] == None:
                                url = "None"
                            else:
                                url = fileitem["url"]
                            xml += """<file basename="%(basename)s"
                            url = "%(url)s"
                            cmdnum = "%(cn)d"/>""" % {
                                "basename": basename,
                                "url": "" if "file" not in fileitem else fileitem["url"],
                                "cn":int(cmdbody["cmdNum"])}
                                            
                            # now any extension in the extdict
                            if "extdict" in fileitem:
                                extdict = fileitem["extdict"]
                                for name in extdict.keys():
                                    xml += """\n<file basename="%(basename)s"
                                             url="%(url)s"
                                             ext="%(ext)s"
                                             cmdnum="%(cn)d"/>""" % {
                                            "basename": basename,
                                            "ext": name,
                                            "url": extdict[name],
                                            "cn":int(cmdbody["cmdNum"])}
 
                    xml += '</command>'
                xml += "</commandQueue>"
                self.wfile.write(xml)
                return 
                
            if parms["path"] == "/recipeindex.xml":
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.end_headers()
                
                self.wfile.write(rl.getRecipeIndex(as_xml=True))
                return
             
            if parms["path"].startswith("/summary"):
                from fitsstore.FitsStorageWebSummary.Summary import summary
                from fitsstore.FitsStorageWebSummary.Selection import getselection
                
                selection = getselection({})
                
                rec =  PRec()
                summary(rec, "summary", selection, [], links=False)
                buff = rec._buff
                self.send_response(200)
                self.send_header("Content-type", "text/html")
                self.end_headers()
                self.wfile.write(buff)
                return
                
            if parms["path"].startswith("/calmgr"):
                from FitsStorageWebSummary.Selection import getselection
                from FitsStorageWebSummary.CalMGR import calmgr
                things = parms["path"].split("/")[2:]
                # print "ppw457:"+ repr(things)
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.end_headers()
                
                # Parse the rest of the URL.
                selection=getselection(things)
            
                # If we want other arguments like order by
                # we should parse them here
                req = PRec()
                retval = calmgr(req, selection)
                print "-------\n"*3,"ppw469:", req._buff
                self.wfile.write(req._buff)
                return 
                
            if parms["path"] == "/calsearch.xml":
                import searcher
                cparms = {}
                cparms.update(parms)
                print "pproxy466:"+repr(cparms)
                if "datalab" in parms:
                    cparms.update({"datalab":parms["datalab"][0]})
                if "filename" in parms:
                    print "ppw481:", repr(parms["filename"])
                    cparms.update({"filename":parms["filename"][0]})
                if "caltype" in parms:
                    cparms.update({"caltype":parms["caltype"][0]})
                else:
                    cparms.update({"caltype":"processed_bias"})
                    
                buff = searcher.search(cparms)
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.end_headers()
                
                self.wfile.write(buff)
                return 
                
            if parms["path"].startswith("/globalcalsearch.xml"):
                from prsproxyutil import calibration_search
                flattenParms(parms)
                resultb = None
                resultf = None
                
                if "caltype" in parms:
                    caltype = parms["caltype"]
                    if caltype == "processed_bias" or caltype == "all":
                        parms.update({"caltype":"processed_bias"})
                        resultb = calibration_search(parms, fullResult=True)
                    if caltype == "processed_flat" or caltype == "all":
                        parms.update({"caltype":"processed_flat"})
                        resultf = calibration_search(parms, fullResult = True)
                
                if caltype == "all":
                    try:
                        domb = minidom.parseString(resultb)
                        domf = minidom.parseString(resultf)
                    except:
                        return None # can't parse input... no calibration
                    calnodefs = domf.getElementsByTagName("calibration")
                    if len(calnodefs) > 0:
                        calnodef = calnodefs[0]
                    else:
                        calnodef = None
                    calnodebs = domb.getElementsByTagName("dataset")
                    if len(calnodebs) > 0:
                        calnodeb = calnodebs[0]
                    
                    #print calnodef.toxml()
                    #print calnodeb.toxml()
                    # domb.importNode(calnodef, True)
                    if calnodef and calnodeb:
                        calnodeb.appendChild(calnodef)
                    elif calnodef:
                        result=domb.toxml()                        
                    else:
                        result=domb.toxml()
                    result = domb.toxml()
                
                print "prsw207:", result
                self.send_response(200)
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.end_headers()
                
                self.wfile.write(result)
                return
                
            if parms["path"] == "/recipecontent":
                if "recipe" in parms:
                    recipe = parms["recipe"][0]
                    content = rl.retrieve_recipe(recipe)
                    self.send_response(200)
                    self.send_header('Content-type', 'text/plain')
                    self.end_headers()

                    self.wfile.write(content)
                    return

            if parms["path"] == "/adinfo":
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()

                if "filename" not in parms:
                    return "Error: Need Filename Parameter"
                if "filename" in parms:
                    try:
                        ad = AstroData(parms["filename"][0])
                    except:
                        self.wfile.write("Can't use AstroData to open %s" % parms["filename"])
                        return
                    if "fullpage" in parms:
                        self.wfile.write("<html><body>")
                    if "fullpage" not in parms:
                    # defaults to false
                        self.wfile.write("<b>Name</b>: %s \n" % os.path.basename(ad.filename))
                        self.wfile.write("<br/><b>Path</b>: %s \n" % os.path.abspath(ad.filename))
                        self.wfile.write("<br/><b>Types</b>: %s\n" % ", ".join(ad.types))
                        recdict = rl.get_applicable_recipes(ad, collate=True)
                        keys = recdict.keys()
                        keys.sort()
                        for key in keys:
                            recname = recdict[key]                        
                            self.wfile.write("<br/><b>Default Recipe(s)</b>:%s "+\
                                             "(<i>due to type</i>: %s)" % (recname, key))
                        alldesc = ad.all_descriptors()
                        self.wfile.write("<br/><b>Descriptors</b>:\n")
                        self.wfile.write('<table style="margin-left:4em">\n')
                        adkeys = alldesc.keys()
                        adkeys.sort()
                        self.wfile.flush()
                        for desc in adkeys:
                            value = str(alldesc[desc])
                            if "ERROR" in value:
                                value = '<span style="color:red">' + value + '</span>'
                            self.wfile.write("<tr><td>%s</td><td>%s</td></tr>\n" % (desc, value))
                            self.wfile.flush()
                        self.wfile.write("</table>")
                    if "fullpage" in parms:
                        self.wfile.write("</body></html>")
                return
                
            if parms["path"] == "/recipes.xml":
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.send_header("Access-Control-Allow-Origin", "http://localhost")
                self.end_headers()
                self.wfile.write(rl.list_recipes(as_xml = True) )
                return

            if parms["path"] == "/reduceconfigs.xml":
                import glob
                rcfgs = glob.glob("./*.rcfg")
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.end_headers()
                retxml = '<?xml version="1.0" encoding="UTF-8" ?>\n'
                retxml += "<reduceconfigs>\n"
                for rcfg in rcfgs:
                    retxml += """\t<reduceconfig name="%s"/>\n""" % rcfg
                retxml += "</reduceconfigs>\n"
                self.wfile.write(retxml)
                return

            if parms["path"].startswith("/datadir.xml"):
                dirdict = self.getDirdict()
                ds = dirdict.dataSpider
                xml = dirdict.as_xml()
                
                self.send_response(200)
                self.send_header('Content-type', 'text/xml')
                self.end_headers()
                
                self.wfile.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
                self.wfile.write("<datasetDict>\n")
                self.wfile.write(xml)
                self.wfile.write("</datasetDict>")
                self.wfile.flush()
                return

            if parms["path"] == "/runreduce":
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write("<html><head></head><body>\n")
                from StringIO import StringIO
                rout = StringIO()
                cmdlist = ["reduce", "--invoked", "--verbose=6"]
                cmdlist.extend(parms["p"])
                
                logdir = ".autologs"
                if not os.path.exists(logdir):
                    os.mkdir(logdir)

                reducelog = os.path.join(logdir, 
                                         "reduce-addcinvokedlog-%d%s" % (
                                             os.getpid(), str(time.time())
                                         ))
                f = open(reducelog, "w")
                
                loglink = "reducelog-latest"
                if os.path.exists(loglink):
                    os.remove(loglink)
                os.symlink(reducelog, loglink)
                            
                # WARNING, this call had used Popen and selected on the 
                # subprocess.PIPE... now uses call there is kruft remaining 
                # (may move it back to old style soon but there was a bug)

                print "adcc running: \n\t" + " ".join(cmdlist)
                pid = subprocess.call( cmdlist,
                                        stdout = f,
                                        stderr = f)
                
                self.wfile.write('<b style="font-size=150%">REDUCTION STARTED</b>')
                self.wfile.write("<pre>")
                # self.wfile.flush()
                f.close()
                f = open(reducelog, "r")      
                txt = f.read()
                # pretty the text
                ptxt = txt
                if (True): # make pretty
                    ptxt = re.sub("STARTING RECIPE:(.*)\n", 
                                  '<b>STARTING RECIPE:</b><span style="color:blue">\g<1></span>\n', ptxt)
                    ptxt = re.sub("STARTING PRIMITIVE:(.*)\n", 
                                  '<i>STARTING PRIMITIVE:</i><span style="color:green">\g<1></span>\n', ptxt)
                    ptxt = re.sub("ENDING PRIMITIVE:(.*)\n", 
                                  '<i>ENDING PRIMITIVE:</i>  <span style="color:green">\g<1></span>\n', ptxt)
                    ptxt = re.sub("ENDING RECIPE:(.*)\n", 
                                  '<b>ENDING RECIPE:</b>  <span style="color:blue">\g<1></span>\n', ptxt)
                    ptxt = re.sub("(STATUS|INFO|FULLINFO|WARNING|CRITICAL|ERROR)(.*?)-(.*?)-", 
                                  '<span style="font-size:70%">\g<1>\g<2>-\g<3>- </span>', ptxt)

                self.wfile.write(ptxt) # f.read())
                f.close()
                try:
                    while False:
                        error = False
                        while(True):
                            stdout = None
                            stderr = None
                            r,v,w = select.select([pid.stdout],[],[],.1)
                            print "prsw112:", repr(r)
                            if len(r):
                                stdout = r[0].read()
                                print "prsw487:", stdout
                                break;
                            else:
                                r,v,w = select.select([pid.stderr],[],[],.1)
                                if len(r):
                                    stderr = pid.stderr.read()
                                    print "prsw494:", stderr
                                    break;

                        if stdout:
                            self.wfile.write(str(stdout))
                        if stderr:
                            self.wfile.write("{"+stderr+"}")

                        self.wfile.flush()
                        if pid.poll()!= None:
                            self.wfile.flush()
                            break
                except:
                    print "PRSW516 EMERGENCY:"
                    
                self.wfile.write("</pre>")
                
                if False:
                    r,v,x = select.select([pid.stderr], [], [], .1)
                    if len(r):
                        stderr = pid.stderr.read()
                    else:
                        stderr = None
                # stderr = pid.stderr.read(100)
                    if stderr != None:
                        self.wfile.write("<b><pre>\n")
                        self.wfile.write(str(stderr))
                        self.wfile.write("</pre></b>")
                self.wfile.write('<b style="font-size=150%">REDUCTION ENDED</b>')
                self.wfile.write("\n</body></html>")
                self.wfile.flush()

                return
            
            if self.path == "/reducelist": #our dynamic content
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()

                # this is the tag in head that autopolls if wanted
                front = """
                <html>
                <head>
                    <meta http-equiv="refresh" content="2" />
                </head>
                <body>"""
                page = front + """
                %(body)s
                <body>
                </html>"""

                self.wfile.write(page)
                if True:
                    body = ""
                    body += "<b>date</b>: %s<br/>\n" \
                            % datetime.datetime.now().strftime("%A, %Y-%m-%d %H:%M:%S")
                    body += "<u>Reduce Instances</u><br/>\n"
                    body += "n.o. instances: %d\n" % rim.numinsts 
                    body += "<ul>"
                    rdict = copy(rim.reducedict)
                    rpids = rim.reducedict.keys()
                    for rpid in rpids:
                        body += "<li>client pid = %d at port %d</li>\n" \
                                % (rpid, rdict[rpid]["port"])
                    body += "</ul>"
                    self.wfile.write(page % {"body":body})
                    self.wfile.flush()
                return 

            if self.path == "/killprs":
                import datetime
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write("Killed this prsproxy instance, pid = %d at %s" \
                                 %(os.getpid(), str(datetime.datetime.now())))
                webserverdone = True
                return
            
            if self.path.startswith("/displaycache"):
                from CacheManager import get_cache_dir, get_cache_file
                
                path = os.path.split(self.path)
                print "prsw 569:", self.path
                if len (path)>1:
                    slot = path[-1]
                    tfile = get_cache_file(slot)
                    
                    try:
                        f = open(tfile)
                    except:
                        return
                    self.send_response(200)
                    self.send_header('Content-type', 'image/png')
                    self.end_headers()

                    while True:
                        t = f.read(102400)
                        if t == "":
                            self.wfile.flush()
                            break
                        self.wfile.write(t)
                return

            if self.path.startswith("/fullheader"):
                realpath = self.path.split('/')
                realpath = realpath[1:]
                
                dirdict = self.getDirdict()
                print "prsw514:", repr(realpath)
                
                name = realpath[-1]
                fname = dirdict.get_full_path(name)
                ad = AstroData(fname)

                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
        
                self.wfile.write("<html><body>\n")
                self.wfile.write('<h2>%s</h2>\n' % name)
                self.wfile.write(ad.infostr(as_html=True))
                alld = ad.all_descriptors()
                self.wfile.write(
                        """
                        <table cellspacing="2px">
                        <COLGROUP align="right" />
                        <COLGROUP align="left" />
                        <thead>
                        <tr>
                        <td style="background-color:grey">Descriptor</td>
                        <td style="background-color:grey">Value</td>
                        </tr>
                        </thead>
                        """)
                alldkeys = alld.keys()
                alldkeys.sort()
                for dname in alldkeys:
                    
                    if type(alld[dname]) == str and "ERROR" in alld[dname]:
                        redval = '<span  style="color:red">'+str(alld[dname])+"</span>"
                        dval = redval
                    else:
                        # print "ppw864:",type(alld[dname])
                        if not alld[dname].collapse_value():
                            import pprint
                            dval = """<pre>%s</pre> """ \
                                   % pprint.pformat(alld[dname].dict_val, indent=4, width=80)
                        else:
                            dval = str(alld[dname])
                    self.wfile.write("""
                        <tr>
                        <td style="text-align:right;border-bottom:solid grey 1px">
                        %(dname)s =
                        </td>
                        <td style="border-bottom:solid grey 1px">
                        %(value)s
                        </td>
                        </tr>
                        """ % { "dname":dname,
                                "value":dval})
                self.wfile.write("</table>")
                self.wfile.write("</body></html>\n")
                                
                return
                
            if self.path.startswith("/htmldocs"):
                import FitsStorage
                realpath = self.path.split('/')
                realpath = realpath[1:]
                dirname = os.path.dirname(FitsStorage.__file__)
                fname = os.path.join(dirname, "htmldocroot", *realpath)
                #print "psrw456: %s\n" % repr(fname)*10
                fnamelocal = os.path.join(
                                os.path.dirname(fname),
                                "FS_LOCALMODE_"+os.path.basename(fname)
                                )
                if os.path.exists(fnamelocal):
                    fname = fnamelocal
                try:
                    f = open(fname, "r")
                    data = f.read()
                    print repr(data)
                    f.close()
                except IOError:
                    data = "<b>NO SUCH RESOURCE FOUND</b>"
                self.send_response(200)
                if fname.endswith(".css"):
                    self.send_header('Content-type', "text/css")
                elif fname.endswith(".png"):
                    self.send_header('Content-type', "image/png")
                else:
                    self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write(data)
                return
                
            #what's the problem with this.
            if self.path.startswith("/cmd_queue"):
                self.counter += 1
                data = str(self.counter)
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write(data)
                return 
                
            if self.path.startswith("/engineering"):
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                
                if "rim" in ADCCHandler.informers:
                    rim = ADCCHandler.informers["rim"]
                    evman = rim.events_manager
                    import pprint
                    data = "<u>Events</u><br/><pre>"
                    data += "num events: %d\n" % len(evman.event_list)
                    for mevent in evman.event_list:
                        data += pprint.pformat(mevent)
                        data += "------------------------------\n"
                    data += "</pre>"
                    self.wfile.write(data)
                return
            
            if self.path.startswith("/event_report.json"):
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                
                if "timestamp" in parms:
                    timestamp = parms["timestamp"][0]
                else:
                    timestamp  = 0
                print "prsprox:",timestamp
                self.wfile.write({"youbo":"mompun"})
                return
                
            if self.path.startswith("/qap"):
                if ".." in self.path:
                    self.send_response(200)
                    self.send_header('Content-type', 'text/html')
                    self.end_headers()
                    data = "<b>bad path error</b>"
                    self.wfile.write(data)
                dirname = os.path.dirname(__file__)
                joinlist = [dirname, "../../scripts/adcc_faceplate/"]
                
                # Split out any parameters in the URL
                self.path = self.path.split("?")[0]

                #append any further directory info.
                joinlist.append(self.path[5:])
                fname = os.path.join(*joinlist)
                self.log_message('"%s" %s %s', "Loading " + \
                                 joinlist[1] + os.path.basename(fname), 
                                 203, '-')
                try:
                    f = open(fname, "r")
                    data = f.read()
                    f.close()
                except IOError:
                    data = "<b>NO SUCH RESOURCE AVAILABLE</b>"
                self.send_response(200)
                if  self.path.endswith(".js"):
                    self.send_header('Content-type', 'text/javascript')
                elif self.path.endswith(".css"):
                    self.send_header("Content-type", "text/css")
                elif fname.endswith(".png"):
                    self.send_header('Content-type', "image/png")
                else:
                    self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write(data)
                return 
            else:
                print "not qap"    
            if self.path == "/":
                self.path = "/KitchenSink.html"
                
            dirname = os.path.dirname(__file__)
            fname = os.path.join(dirname, "pyjamaface/prsproxygui/output", *(self.path[1:]))
            
            try:
                f = open(fname, "r")
                data = f.read()
                f.close()
            except IOError:
                data = "<b>NO SUCH RESOURCE FOUND</b>"
                
            self.send_response(200)
            self.send_header('Content-type', 'text/html')
            self.end_headers()
            self.wfile.write(data)
            return 
        except IOError:
            raise
            print "handling IOError"
            self.send_error(404,'File Not Found: %s' % self.path)
Пример #25
0
import searcher
import data_load
import indexer
#the functions will be called here in order to perform search_engine operations

s=data_load.file_traverse()
indexer.preprocess(s)
print("Please enter the word")
s1=searcher.search()

print(s1)

#Output1

#Please enter the word
#whole and loaves
#['whole', 'loaves']
#[['fortune1\\fortune1\\fortune2\\fortune2.txt']]

#Output2

#Please enter the word
#war or trapped
#['war', 'trapped']
#[['fortune1\\fortune1\\fortune2\\fortune2.txt'], ['fortune1\\fortune1\\fortune2\\fortune3\\fortune4\\fortune5\\fortune6\\fortune7\\fortune8\\fortune9\\fortune10\\fortune11\\fortune11.txt']]
Пример #26
0
def searchForContent():
    data_load.get_traversal_data()
    search_data = indexer.read_data()
    searcher.search(search_data)
Пример #27
0
def index_post():
    text = request.forms.text
    return searcher.search(str(text))
Пример #28
0
file_data = get_traversal_data()
web_data = visit_url(seed, "www.newhaven.edu", returnList)

webPickle = "raw_web.pickle"
dataPickle = "raw_data.pickle"

# catch exceptions dealing with pickling the objects, as well as catching 
# any exceptions dealing with opening the file to begin with
try:
	with open(webPickle, "bw") as out:
		try:
			pickle.dump(web_data, out)
		except pickle.PicklingError:
			print("Unpicklable object passed into dump().")
except IOError as ioe:
	print("Unable to write to file: " + ioe.filename)

# catch exceptions dealing with pickling the objects, as well as catching 
# any exceptions dealing with opening the file to begin with
try:
	with open(dataPickle, "bw") as out:
		try:
			pickle.dump(file_data, out)
		except pickle.PicklingError:
			print("Unpicklable object passed into dump().")
except IOError as ioe:
	print("Unable to write to file: " + ioe.filename)

indexer.process_data("unh_shelve", "indexed_files", dataPickle, webPickle)
searcher.search("unh_shelve", "indexed_files")
Пример #29
0
def index(request, entity_id=None):
    current_step = 1
    docs = []
    only_one = False

    if not request.user.is_authenticated():
        login_required = True
    else:
        login_required = False
        current_step = 2

    doc_id = entity_id
    selected_id = None
    selected_library = None

    if doc_id:
        selected_doc = get_doc_from_index(
                settings.INDEX_NAME, 'organisation', doc_id)
        if selected_doc:
            selected_library = get_source(selected_doc)
            selected_id=doc_id
            current_step = 3

            if selected_library['organisation_type'] != 'library':
                family_tree = searcher.search_family_tree(user=request.user)
                member = family_tree.get(doc_id)
                if member:
                    selected_library['children'] = [
                        (cid, cname) for cid, cname in
                        zip(member['children'], member['children_names'])]

                    parents = member['parent_names']
                    parents.reverse()
                    selected_library['parents'] = ' &rarr; '.join(parents)
    else:
        if not login_required:
            request.session.set_expiry(12*60*60)
            current_step = 2

        required_fields = ['organisation_type',
                           'contact.street_address.municipality_fi',
                           'name_fi',
                           'parent_organisation',
                           'meta.modified']

        if request.user.is_authenticated():
            if request.user.is_superuser:
                results = searcher.search(
                    # todo: remove size
                    searcher.ALL_LIBS, size = 10000, fields = required_fields)
            else:
                results = searcher.front_page_search(
                    request.user, size = 500, fields = required_fields)

            family_tree = searcher.search_family_tree(user=request.user)

            for doc in results[0]:
                if 'organisation_type' not in doc:
                    print(doc)

                if (doc['organisation_type'] in
                    ['branchlibrary', 'unit', 'library']):
                    family_member = family_tree.get(doc['_id'])

                    if family_member:
                        parents = family_member['parent_names']
                        children = family_member['children_names']
                        children_ids = family_member['children']

                        if len(parents):
                            parents.reverse()
                            doc['parents'] = ' &rarr; '.join(parents)

                        if len(children):
                            doc['children'] = [(cid, cname) for cid, cname in
                                               zip(children_ids, children)]

                if (doc['organisation_type'] not in ['department',
                                                     'mobile_stop']):
                    docs.append(doc)


            if not login_required and len(docs) == 1:
                current_step = 3
                selected_library = docs[0]
                selected_id = selected_library['_id']
                only_one = True

    t = env.get_template('index.html')

    note_filter_date = dt.datetime.now() - dt.timedelta(days=7)
    notifications = models.Notification.objects.unread(request.user, note_filter_date)

    context = {
        'login_required'      : login_required,
        'docs'                : docs,
        'library_count'       : len(docs),
        'username'            : request.user.username,
        'access_to_templates' : (
            not login_required and
            request.user.has_perm('sites.access_to_templates')),

        'summary'             : teaser('organisation'),
        'selected_library'    : selected_library,
        'selected_id'         : selected_id ,
        'frontpage_url'       : frontpage_url,
        'editpage_url'        : editpage_url,
        'current_step'        : current_step,
        'only_one'            : only_one,
        'library_limit'       : 5,
        'username_string'     : _("Username"),
        'password_string'     : _("Password"),
        'login_string'        : _("Log in"),
        'notifications'       : notifications,
        'can_export'          : user_has_group(request.user, 'export')
        }

    context.update(csrf(request)) # todo: better way?

    s = template_render(t, context)

    return HttpResponse(s)
Пример #30
0
if not args.datadir:
    args.datadir = "../20news-bydate-test"
if not args.indexfile:
    args.indexfile = "../index.data"

# If the user wants to create the indexes
if args.index:
    # Open all the files
    documents = connecter.fetch(args.datadir)
    # Tokenize, lowercase, remove accents
    tokenizedDocs = tokenizer.analyze(documents, [textprocessor.Normalizer()])
    # Create index
    index = indexer.buildIndex(tokenizedDocs)
    # Serialize index in file
    index.save(args.indexfile)

# If the user passed search keywords as parameter
if args.words:
    # Read index from file
    index = indexer.loadIndex(args.indexfile)
    # Search in index with the specified keywords
    results = searcher.search(index, args.words)
    # Display the results
    print("Your request is matched in the following files:")
    if len(results) > 0:
        for r in results:
            print(r)
    else:
        print("No file matches your request.")
Пример #31
0
import searcher
import data_load
import indexer
import weather
#the functions will be called here in order to perform search_engine operations


str_set=data_load.main()
indexer.preprocess(str_set)
print("Please enter the word")
query=input()
while(query!="q"):
    weather.weatherInfo()
    s1=searcher.search(query)
    print("Search Results: ")
    for x in s1:
        print(x[0])
    print("Please enter any word other than 'q' to continue")
    query=input()
#Output1
    
#Please enter the word
#VIP
#weather Conditions :  Rain
#Performing search operation for  {'VIP'}
#Search Results: 
#www.newhaven.edu/admissions/VIP/
#Please enter any word other than 'q' to continue
#admissions and ugrad
#weather Conditions :  Rain
#Performing search operation for  {'admissions', 'ugrad'}
Пример #32
0
for t in temp :
    if ( t == "and" ) :
        op = "and"
    elif ( t != "or" ) :
        temp1.add(t)
print( "Performing '" + op.upper() + "' search for: " + str(temp1) )
out = list(temp1)

try :
    page = urllib.request.urlopen("http://api.openweathermap.org/data/2.5/weather?q="+"06516")
    code = page.getcode()
    if(code == 200 ) :
        content=page.read()
        content_string = content.decode("utf-8")
        json_data = json.loads(content_string)
        name = json_data["name"]
        weather = json_data["weather"][0]["main"]
        sun_rise = json_data["sys"]["sunrise"]
        sun_set = json_data["sys"]["sunset"]
except URLError as e :
    print("error")



dictionary_data = indexer.indexer()
print()
print("location : " + str(name) + " Weather : " + str(weather) + " Sun Rise : " + str(sun_rise) + " Sun Set : " + str(sun_set))
print()
searcher.search(dictionary_data,out,op)

Пример #33
0
ap.add_argument("-i",
                "--index",
                required=True,
                help="Path to storage directory")
ap.add_argument("-q", "--query", required=True, help="Path to query directory")
ap.add_argument("-r",
                "--result-path",
                required=True,
                help="Path to results directory")
args = vars(ap.parse_args())

# initialize colour descriptor
cd = colourdescriptor.ColourDescriptor((8, 12, 3))

# load query and get it's features
query = cv2.imread(args["query"])
features = cd.describe(query)

# perform search
searcher = searcher.Searcher(args["index"])
results = searcher.search(features)

# display query
cv2.imshow("Query", query)

# loop over results
for (score, result_id) in results:
    result = cv2.imread(result_id)
    cv2.imshow("Result", result)
    cv2.waitKey(0)
Пример #34
0
import searcher
import data_load
import indexer
import WEBcrawler
import weather

weather.weather()
visit_url.visit_url()
indexer.indexer()
searcher.searcher()
data_load.traverser()
d = indexer.indexer("raw_data.pickle","shelve")
searcher.search(d)
Пример #35
0
import data_load
import searcher
import indexer
import webcrawler

data_load.traverser()
webcrawler.webcrawler()
d = indexer.process_data("raw_data.pickle", "webdata.pickle")

searcher.search("fortune_shelve")
Пример #36
0
#traverser()
#web_crawler()
#indexer.create_shelve("raw_data.pickle","url_data.pickle")

#I am asking my query in the combine_search.py because I would need it for other modules as well, eg. weather

print("\n\n"+"===>>> Welcome to Avik's Search Engine <<<===")
query = input("Query: ")
query = query.lower().strip(" ") #get rid of the front and rear spaces
query = query.split(" ") #take every word into a list with a space being the delimiter
query = list(set(query)) #removes repeated items from the list and converting back to list from set
if ("or" in query) and ("and" not in query):#if "and" is present, "and" operation should take place
    query.remove("or")#get rid of the "or" content from the list
    search_type = "or"
#elif performs "and" operation search, user could just type "flower sheep" without operator
elif ("and" in query) or (len(query)>1 and ("and" not in query)and("or" not in query)):
    if "and" in query:
        query.remove("and")
    if "or" in query:
        query.remove("or")
    search_type = "and"
else:
    search_type = ""#when user inputs only one string this no particular search type is used



get_weather(query)
search(query,search_type,"dictionary_data")

#so far working with only title and metadata. 
Пример #37
0
import searcher
import indexer

d = indexer.process_data("raw_data.pickle", "fortune_shelves");
d = indexer.process_data("urls.pickle", "fortune_shelves");
searcher.search("fortune_shelves");
Пример #38
0
        json_data = json.loads(content_string)

        city = json_data.get('name', None)
        weather = json_data.get("weather")

        if city:
            print('The weather for {} is {}'.format(city, weather[0]['main']))


query=input("query: ")

if not query:
    print("Must enter something")
    exit(0)

qtype, keywords = searcher.detect_query_type(query)
 
#Initiate Task 4
call_weather_api(keywords)

#Process both pickle files and store in fortunes_shelve
indexer.preprocess(['web_data.pickle','data.pickle',], 'fortunes_shelve')

output = searcher.search('fortunes_shelve', qtype, keywords)

if output:
    for found in output:
        print("Found at ", found)
else:
    print ("Not Found")
Пример #39
0
from data_load import traverser
from indexer import create_dictionary
from searcher import search

traverser()
create_dictionary()
search("dictionary_data")
Пример #40
0
import searcher
import indexer
from indexer import query
import data_load
import crawler_new

indexer.process_data("raw_data.txt","shelve_file")
indexer.process_data("raw_data1.txt","shelve_file")

searcher.search("shelve_file",query)
Пример #41
0
import dataload
import indexer
import searcher

#dataload.traverse()
indexer.dict()
searcher.search()
Пример #42
0
import searcher
import indexer
import data_load
dict_words = indexer.process_data("raw_data.txt","shelve_file")
searcher.search("shelve_file")
Пример #43
0
	except URLError as e:
		print("error")

	return returnList

from data_load import get_traversal_data
import indexer
import searcher
import pickle

crawler_backlog = {}
seed = "http://www.newhaven.edu/"
crawler_backlog[seed]=0
returnList = []
file_data = get_traversal_data()
web_data = visit_url(seed, "www.newhaven.edu", returnList)

webPickle = "raw_web.pickle"
dataPickle = "raw_data.pickle"

out = open(webPickle, "bw")
pickle.dump(web_data, out)
out.close()

out = open(dataPickle, "bw")
pickle.dump(file_data, out)
out.close()

indexer.process_data("unh_shelve", "indexed_files", dataPickle, webPickle)
searcher.search("unh_shelve", "indexed_files")
Пример #44
0
import searcher
import data_load
import indexer
import weather
#the functions will be called here in order to perform search_engine operations

str_set = data_load.main()
indexer.preprocess(str_set)
print("Please enter the word")
query = input()
while (query != "q"):
    weather.weatherInfo()
    s1 = searcher.search(query)
    print("Search Results: ")
    for x in s1:
        print(x[0])
    print("Please enter any word other than 'q' to continue")
    query = input()
#Output1

#Please enter the word
#VIP
#weather Conditions :  Rain
#Performing search operation for  {'VIP'}
#Search Results:
#www.newhaven.edu/admissions/VIP/
#Please enter any word other than 'q' to continue
#admissions and ugrad
#weather Conditions :  Rain
#Performing search operation for  {'admissions', 'ugrad'}
#Search Results:
Пример #45
0
    # Subscribe to LandmarkDetected event from ALLandMarkDetection proxy.
    landmarkProxy.subscribe("landmarkTest")

    markData = memoryProxy.getData("LandmarkDetected")
    # Wait for a mark to be detected.
    @timeout(6)
    def find_mark(markData):
        while (len(markData) == 0):
            markData = memoryProxy.getData("LandmarkDetected")
        return markData

    try:
        markData = find_mark(markData)
    except Exception, e:
        print "time out, head's gonna move"
        result = search(ip)
        if len(result) == 0:
            print "timeout completely"
            print "ERROR: ", e
            sys.exit(1)

        else:
            markData = result
            print "just found it"
##    finally:
##        pass#sys.exit(1)
    print "markdata"
    print markData
    motionProxy.setStiffnesses("Head", 1.0)
    # Retrieve landmark center position in radians.
    wzCamera = markData[1][0][0][1]
Пример #46
0
#import data_load
import indexer 
import searcher

query=input("query: ")
query = query.strip(" ").split() #get rid of the front and rear spaces, space being the delimiter
web_query = []
query = list(set(query))
web_query = query[:]

d = indexer.get_traversal_data("raw_data.pickle.txt")
searcher.search(d,query)
w = indexer.get_traversal_data("web_data.pickle.txt")
searcher.search(w,web_query)

Пример #47
0
    def crawl(this,
              keyWord,
              nReq=MAX_DOC,
              sortReq='',
              fieldReq='',
              timeSpan=None):
        '''
        一个生成器,用于采集数据
        以wos核心合集为数据库,按指定时间跨度的关键词检索
        以日期降序、被引频次降序、相关性等排序方式排序检索结果
        从检索结果中抽取文章的标题、作者、通讯作者、电子邮箱等字段
        
        Parameters
        ----------
        keyWord : 关键词
        nReq : 需要采集的条数,默认为全采
        sortReq : 检索结果排序代码,见sortId.py
        timeSpan : 检索的时间跨度
        
        Returns
        -------
        crawl : 一个生成器,生成获取的paper各字段
        '''

        if nReq is None or nReq < 0: nReq = MAX_DOC

        driver = this.driver
        driver.get(this.home)
        print('INFO : home page opened')

        selectDatabase(driver)
        print('INFO : database selected')

        selectSpan(driver, timeSpan)
        print('INFO : time span selected')

        selectSearchField(driver, fieldReq)
        print('INFO : search field selected')

        search(driver, keyWord)

        msg = ifSearchFailed(driver)
        assert not msg, 'search failed : "%s"' % msg

        print('INFO : search succeed')

        sid, qid = getIds(driver)
        sortResults(driver, sid, qid, sortReq)
        print('INFO : sort succeed')

        nRst = getNumOfRst(driver)

        if MAX_DOC < nRst:
            print(
                'WARNING : too much results, please consider shortening time span'
            )

        switchLabel(driver, -1)

        print('INFO : start to extract data')
        i = 0
        maxI = min(nReq, nRst, MAX_DOC)

        tStart = time()
        ts = tc = 0
        for lnk in getLnks(driver, nReq, nRst):
            i += 1
            sTimeCost = ', %.2fs last page'
            print('INFO : extracting %d/%d%s' %
                  (i, maxI, '' if i == 1 else sTimeCost % tc),
                  end='\r')

            ts = time()
            newLabel(driver, lnk)
            switchLabel(driver, -1)

            this.__waitTillOpen()

            rst = extractValues(driver)
            driver.close()
            switchLabel(driver, -1)
            tc = time() - ts

            yield rst
        tCost = time() - tStart
        print('\nINFO : extracting done, %.2fs/paper' % (tCost / maxI))
Пример #48
0
def search(query):
    return json.encode(searcher.search(query))
Пример #49
0
        city = json_data.get('name', None)
        weather = json_data.get("weather")

        if city:
            print('The weather for {} is {}'.format(city, weather[0]['main']))


query = input("query: ")

if not query:
    print("Must enter something")
    exit(0)

qtype, keywords = searcher.detect_query_type(query)

#Initiate Task 4
call_weather_api(keywords)

#Process both pickle files and store in fortunes_shelve
indexer.preprocess([
    'web_data.pickle',
    'data.pickle',
], 'fortunes_shelve')

output = searcher.search('fortunes_shelve', qtype, keywords)

if output:
    for found in output:
        print("Found at ", found)
else:
    print("Not Found")
Пример #50
0
#*****************************************************************************************
	#CSCI 6651
	#Homework 4 File Traverse
	#Building a search engine
	#search_combine.py
	#Author: Yong Deng
	#Since:  5-7-2015
	#This program is a to call other modules to perform searching.
#*****************************************************************************************

import data_load
import indexer
import searcher

data = indexer.processData("raw_data.pickle", "fortune_shelve")
searcher.search("fortune_shelve")

#=================================================================================================================
# Sample outputs
#=================================================================================================================
# Yongs-MBP-5:YongDeng_HW4_fileTrasverser yongdeng$ python3 search_combine.py 

# query:the and was or if

# Performing AND search for: {'was', 'the', 'if'} 

# >>Found at /Users/yongdeng/Documents/CS/UNH assignments/CSCI 6651 python/myPython/YongDeng_HW4_fileTrasverser/fortune1/fortune2/fortune3/fortune4/fortune5/fortune6/fortune7/fortune8/fortune9/fortune9.log

# Execution time: 231 

#-----------------------------------------------------------------------------------------------------------------
Пример #51
0
def load_tracks():
	'''
	Load tracks from disk
	'''
	albums, tracks = {}, []

	mp3_files = get_all_files(PRE_PROCESS_DIR) 

	# TODO: should be inside a for loop as we traverse through the directory given by PRE_PROCESS_DIR
	for filepath in mp3_files:
		if not load_track(filepath, albums, tracks): log('skipped file due to insufficient track info: ' + filepath)
		else: log('loaded file: ' + filepath)

	# TODO: for loop ends here

	# process tracks in 'albums' first
	covers = {}

	for album_index in albums:
		album = albums[album_index]
		artist_name = get_artist_str(album.artists, '')
		# TODO: change the next line
		track_count = len(album.tracklist) if len(album.tracklist) > 8 else None 
		res_albums = search(album.title, artist_name, track_count)
		best_album, best_mapping, score = Album.find_best_album_match(album, res_albums)

		if not best_album:
			log('no matched album found for ' + album.title)
			continue
		else: pass

		# add cover for the album
		img_file, scrap_err = get_album_covers(best_album.artists[0], best_album.title)
		if scrap_err: break
		# TODO: ask if the user wants to proceed even no album cover can be obtained instead
		elif img_file: best_album.add_cover(img_file)
			

		for i, j in best_mapping:
			log('{} -> {} ({})'.format(str(album.tracklist[i]), str(best_album.tracklist[j]), str(best_album)), 'change')
			# TODO: let user confirm using input()
			album.tracklist[i].apply_track_diff(best_album.tracklist[j])

		for track in album.tracklist:
			if track.changed:
				track.save()
				log('file saved with new changes: ' + track.filepath)
				# move the file to new location and rename it
				new_filepath = construct_filepath(track)
				log('new file path: ' + new_filepath)
				move_file(track.filepath, new_filepath)
				log('file moved')
				track.filepath = new_filepath
			else: tracks.append(track) # else treat it as an individual track

	# TODO: then process tracks in 'tracks'
	album = Album()
	for track in tracks:
		album.tracklist.append(track)
		artist_name = get_artist_str(track.artists, '')
		res_albums = search(track.title, artist_name, track=True)
		# noe bascally copy and paste, may be I should factor the code at some point
		best_album, best_mapping, score = Album.find_best_album_match(album, res_albums)

		if not best_album:
			log('no matched album found for ' + track.title)
			continue
		else: pass

		# add cover for the album
		img_file, scrap_err = get_album_covers(best_album.artists[0], best_album.title)
		if scrap_err: break
		# TODO: ask if the user wants to proceed even no album cover can be obtained instead
		elif img_file: best_album.add_cover(img_file)
			

		for i, j in best_mapping:
			log('{} -> {} ({})'.format(str(album.tracklist[i]), str(best_album.tracklist[j]), str(best_album)), 'change')
			# TODO: let user confirm using input()
			album.tracklist[i].apply_track_diff(best_album.tracklist[j])

		if track.changed:
			track.save()
			log('file saved with new changes: ' + track.filepath)

			# move the file to new location and rename it
			new_filepath = construct_filepath(track)
			log('new file path: ' + new_filepath)
			move_file(track.filepath, new_filepath)
			log('file moved')
			track.filepath = new_filepath

		album.tracklist.clear()


	delete_dir()