Пример #1
0
def index():
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    config = db_req.request(path='/sp/db/get_config_variables')
    requests = db_req.request(path='/sp/db/get_connection_requests')
    return flask.render_template('index.html',
                                 config=config,
                                 requests=requests)
Пример #2
0
 def test_post_request(self):
     requester = KearchRequester('https://jsonplaceholder.typicode.com',
                                 requester_name='test_requester')
     payload = {'userId': 1, 'title': 'hello', 'body': 'world'}
     resp = requester.request(method='POST', path='/posts', payload=payload)
     print(resp.json())
     assert (resp.status_code == 201)
Пример #3
0
def retrieve(queries, max_urls):
    elastic_requester = KearchRequester(ELASTIC_HOST,
                                        ELASTIC_PORT,
                                        conn_type='elastic')

    query = ' '.join(queries)
    payload = {
        'query': {
            'multi_match': {
                'query': query,
                'type': 'phrase',
                'fields': ['title', 'text']
            }
        }
    }
    resp = elastic_requester.request(path='/' + ELASTIC_INDEX + '/' +
                                     ELASTIC_TYPE + '/_search?pretty',
                                     payload=payload,
                                     method='POST')

    hits = []
    if 'hits' in resp and 'hits' in resp['hits']:
        hits = resp['hits']['hits']

    print(hits, file=sys.stderr)
    results = []
    for d in hits:
        results.append({
            'url': d['_source']['url'],
            'title': d['_source']['title'],
            'description': d['_source']['text'][0:200],
            'score': d['_score']
        })

    return results
Пример #4
0
def url_to_webpage(url):
    print('Start checking parameter files.', file=sys.stderr)
    update_param_file(kearch_classifier.classifier.PARAMS_FILE)
    update_param_file(ave.CACHE_FILE)
    print('End checking parameter files.', file=sys.stderr)

    try:
        w = kearch_classifier.webpage.Webpage(url)
    except kearch_classifier.webpage.WebpageError:
        print('Cannot make webpage of ', url, file=sys.stderr)
        return None

    cl_req = KearchRequester(CLASSIFIER_HOST, CLASSIFIER_PORT)
    payload = {'body_words': w.words, 'title_words': w.title_words}

    try:
        res = cl_req.request(path='/sp/classifier/classify',
                             method='POST',
                             payload=payload)
    except RequesterError:
        return None

    if res['result'] == kearch_classifier.classifier.IN_TOPIC:
        return w
    else:
        None
Пример #5
0
def crawl_a_page(url):
    if DEBUG_UNIT_TEST:
        ret = {
            'url': 'www.google.com',
            'title': 'Google is the biggest IT company.',
            'text': 'hello world!',
            'inner_links': ['www.facebook.com'],
            'outer_links': []
        }
        time.sleep(2)
        return ret
    else:
        crawler_requester = KearchRequester(CRAWLER_CHILD_HOST,
                                            CRAWLER_CHILD_PORT)
        try:
            print('requesting   /crawl_a_page?url={} ...'.format(url))
            ret = crawler_requester.request(
                path='/sp/crawler-child/crawl_a_page',
                params={'url': url},
                timeout=SP_CHILD_TIMEOUT)
            print('get response /crawl_a_page?url={}'.format(url))
        except RequesterError as e:
            print(e, file=sys.stderr)
            ret = {}
        return ret
Пример #6
0
def index():
    database_requester = KearchRequester(DATABASE_HOST,
                                         DATABASE_PORT,
                                         conn_type="sql")
    sp_servers = database_requester.request(path='/me/db/list_up_sp_servers',
                                            method='GET')

    return flask.render_template('index.html', sp_servers=sp_servers)
Пример #7
0
 def test_get_request(self):
     requester = KearchRequester('https://jsonplaceholder.typicode.com',
                                 requester_name='test_requester')
     resp = requester.request(method='GET',
                              path='/posts',
                              params={'userId': 1})
     print(resp.json())
     assert (resp.status_code == 200)
Пример #8
0
def send_a_connection_request(sp_host):
    me_host = get_me_host()

    gw_req = KearchRequester(sp_host, SPECIALIST_GATEWAY_PORT)
    res = gw_req.request(path=SP_GATEWAY_BASEURL + 'add_a_connection_request',
                         method='POST',
                         payload={'me_host': me_host})
    return res
Пример #9
0
def is_connected(me_host):
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    reqs = db_req.request(path='/sp/db/get_connection_requests')
    if (me_host in reqs['out'] and reqs['out'][me_host]) or \
            (me_host in reqs['in'] and reqs['in'][me_host]):
        # already approved
        return True
    return False
Пример #10
0
def send_a_dump(sp_host, me_host, summary):
    d = dict()
    d['host'] = sp_host
    d['summary'] = summary

    kr = KearchRequester(me_host, META_GATEWAY_PORT)
    result = kr.request(path='/v0/me/gateway/add_a_summary', method='POST',
                        payload=d)
    return result
Пример #11
0
def test_evaluate():
    res = evaluate_main('google')
    assert(type(res) is dict)
    for k, v in res.items():
        assert(type(k) is str)
        db_req = KearchRequester(
            DATABASE_HOST, DATABASE_PORT, conn_type='sql')
        sp_servers = db_req.request(path='/me/db/list_up_sp_servers')
        assert(k in sp_servers.keys())
Пример #12
0
def retrieve(sp_host, queries, max_urls):
    gw_req = KearchRequester(sp_host, SPECIALIST_GATEWAY_PORT)
    results = gw_req.request(path=SP_GATEWAY_BASEURL + 'retrieve',
                             method='GET',
                             params={
                                 'queries': queries,
                                 'max_urls': max_urls
                             })
    return results
Пример #13
0
def retrieve(queries, max_urls):
    kr = KearchRequester(QUERY_PROCESSOR_HOST, QUERY_PROCESSOR_PORT)
    results = kr.request(path='/sp/query-processor/retrieve',
                         method='GET',
                         params={
                             'queries': queries,
                             'max_urls': max_urls
                         })
    return results
Пример #14
0
def add_new_sp_server(summary):
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    result = db_req.request(path='/me/db/add_new_sp_server', payload=summary)
    sp_host = summary['sp_host']
    db_req.request('/me/db/approve_a_connection_request',
                   payload={
                       'in_or_out': 'out',
                       'sp_host': sp_host
                   })
    return result
Пример #15
0
def send_a_connection_request():
    me_host = flask.request.form['me_host']

    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    payload = {'me_host': me_host, 'in_or_out': 'out', 'scheme': 'http'}

    db_req.request(path='/sp/db/add_a_connection_request',
                   payload=payload,
                   method='POST')

    config = db_req.request(path='/sp/db/get_config_variables')
    sp_host = config['host_name']
    engine_name = config['engine_name']
    payload = {
        'sp_host': sp_host,
        'engine_name': engine_name,
        'scheme': 'http'
    }

    gw_req = KearchRequester(me_host, ME_GATEWAY_PORT)
    gw_req.request(path=ME_GATEWAY_BASEURL + 'add_a_connection_request',
                   payload=payload,
                   method='POST')

    return flask.redirect(flask.url_for("index"))
Пример #16
0
def update_config():
    update = dict()
    if 'connection_policy' in flask.request.form:
        update['connection_policy'] = flask.request.form['connection_policy']
    if 'host_name' in flask.request.form:
        update['host_name'] = flask.request.form['host_name']
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    db_req.request(path='/me/db/set_config_variables',
                   payload=update,
                   method='POST')
    return flask.redirect(flask.url_for("index"))
Пример #17
0
def learn_params_for_evaluator():
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    summaries = db_req.request(path='/me/db/get_sp_summaries')
    e = kearch_evaluator.evaluator.Evaluator()
    e.learn_params(summaries)
    e.dump_params(kearch_evaluator.evaluator.PARAMS_FILE)

    bparam = open(kearch_evaluator.evaluator.PARAMS_FILE, 'rb').read()
    tparam = base64.b64encode(bparam).decode('utf-8')
    params = {'name': kearch_evaluator.evaluator.PARAMS_FILE, 'body': tparam}
    db_req.request(path='/me/db/push_binary_file', params=params)
    return flask.redirect(flask.url_for("index"))
Пример #18
0
def init_crawl_urls():
    form_input = flask.request.form['urls']
    urls = form_input.split('\n')
    urls = map(lambda x: x.rstrip(), urls)
    payload = dict()
    payload['urls'] = urls

    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    ret = db_req.request(path='/sp/db/push_urls_to_queue',
                         payload=payload,
                         method='POST')
    return jsonify(ret)
Пример #19
0
def approve_a_connection_request():
    me_host = flask.request.form['me_host']
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')

    send_a_summary(me_host)

    db_req.request('/sp/db/approve_a_connection_request',
                   payload={
                       'in_or_out': 'in',
                       'me_host': me_host
                   })

    return flask.redirect(flask.url_for("index"))
Пример #20
0
def get_result_from_sp(sp_host, query, max_urls):
    gw_req = KearchRequester(GATEWAY_HOST, GATEWAY_PORT)
    r = gw_req.request(path=ME_GATEWAY_BASEURL + 'retrieve',
                       params={
                           'sp_host': sp_host,
                           'queries': query,
                           'max_urls': max_urls
                       })
    res = list()
    for d in r:
        d['sp_host'] = sp_host
        res.append(d)
    return res
Пример #21
0
def add_a_connection_request(me_host, scheme):
    db = KearchRequester(DATABASE_HOST, DATABASE_PORT,
                         conn_type='sql')
    config = db.request(path='/sp/db/get_config_variables', method='GET')
    if config[CONFIG_CONNECTION_POLICY] == 'public':
        sp_host = config[CONFIG_HOST_NAME]
        dump = db.request(path='/sp/db/dump_database', method='GET')
        res = send_a_dump(sp_host, me_host, dump)
        return res
    else:
        res = db.request(path='/sp/db/add_a_connection_request',
                         payload={'in_or_out': 'in', 'me_host': me_host,
                                  'scheme': scheme})
        return res
Пример #22
0
def update_param_file(filename):
    db_req = KearchRequester(
        DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    ret = db_req.request(path='/me/db/check_binary_file_timestamp',
                         params={'name': filename})
    dt = ret['updated_at']
    print('db:', dt, file=sys.stderr)
    if filename not in timestamp or timestamp[filename] < dt:
        timestamp[filename] = dt
        ret = db_req.request(path='/me/db/pull_binary_file',
                             params={'name': filename})
        body = base64.b64decode(ret['body'].encode())
        with open(filename, 'wb') as f:
            f.write(body)
        evaluater.load_params(kearch_evaluater.evaluater.PARAMS_FILE)
Пример #23
0
def search():
    if flask.request.method == 'GET':
        query = flask.request.args['query']
        queries = query.split()
        kr = KearchRequester(QUERY_PROCESSOR_HOST, QUERY_PROCESSOR_PORT)
        results = kr.request(path='/sp/query-processor/retrieve',
                             method='GET',
                             params={
                                 'queries': ' '.join(queries),
                                 'max_urls': MAX_URLS
                             })
        return flask.render_template('result.html',
                                     results=results,
                                     query=query)
    else:
        return flask.redirect(flask.url_for('index.html'))
Пример #24
0
def update_password():
    password = flask.request.form['password']
    password_again = flask.request.form['password_again']
    if password != password_again:
        r = {'message': 'Passwords do not match.'}
        abort(500, r)
    u = current_user.name
    h = hashlib.sha512(password.encode('utf-8')).hexdigest()
    print(u, h, file=sys.stderr)
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    ret = db_req.request(path='/sp/db/update_password_hash',
                         payload={
                             'username': u,
                             'password_hash': h
                         },
                         method='POST')
    return jsonify(ret)
Пример #25
0
def delete_a_connection_request():
    me_host = flask.request.form['me_host']
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')

    db_req.request('/sp/db/delete_a_connection_request',
                   payload={'me_host': me_host})

    config = db_req.request(path='/sp/db/get_config_variables')
    sp_host = config['host_name']
    gw_req = KearchRequester(me_host, ME_GATEWAY_PORT)
    gw_req.request(path=ME_GATEWAY_BASEURL + 'delete_a_connection_request',
                   payload={'sp_host': sp_host},
                   method='DELETE')

    return flask.redirect(flask.url_for("index"))
Пример #26
0
def evaluate_main(query):
    print('Start checking parameter files.', file=sys.stderr)
    update_param_file(kearch_evaluater.evaluater.PARAMS_FILE)
    print('End checking parameter files.', file=sys.stderr)

    queries = query.split(' ')

    # Some specialist servers in the evaluater may be deleted
    # by /me/gateway/delete_a_connection_request.
    # Therefore, we must confirm all specialist servers in the evaluater
    # exist in the database truly.
    db_req = KearchRequester(
        DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    sp_servers = db_req.request(path='/me/db/list_up_sp_servers')
    res_eval = evaluater.evaluate(queries)
    res = dict()
    for s in sp_servers.keys():
        if s in res_eval:
            res[s] = res_eval[s]
    return res
Пример #27
0
def approve_a_connection_request():
    sp_host = flask.request.form['sp_host']

    gw_req = KearchRequester(sp_host, SP_GATEWAY_PORT)
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
    config = db_req.request(path='/me/db/get_config_variables')
    me_host = config['host_name']

    summary = gw_req.request(path=SP_GATEWAY_BASEURL + 'get_a_summary',
                             params={'me_host': me_host})
    db_req.request(path='/me/db/add_new_sp_server',
                   payload=summary,
                   method='POST')
    db_req.request(path='/me/db/approve_a_connection_request',
                   payload={
                       'in_or_out': 'in',
                       'sp_host': sp_host
                   },
                   method='POST')

    return flask.redirect(flask.url_for("index"))
Пример #28
0
def search():
    if flask.request.method == 'GET':
        query = flask.request.args['query']
        sp = flask.request.args['sp']

        query_processor_requester = KearchRequester(QUERY_PROCESSOR_HOST,
                                                    QUERY_PROCESSOR_PORT)
        database_requester = KearchRequester(DATABASE_HOST,
                                             DATABASE_PORT,
                                             conn_type="sql")

        params = {'query': query, 'max_urls': MAX_URLS}
        if sp != "":
            params['sp'] = sp

        results = query_processor_requester.request(
            path='/me/query-processor/retrieve', method='GET', params=params)
        sp_servers = database_requester.request(
            path='/me/db/list_up_sp_servers', method='GET')

        print('results = ', results, file=sys.stderr)

        return flask.render_template('result.html',
                                     results=results,
                                     selected_sp=sp,
                                     sp_servers=sp_servers,
                                     query=query)
    else:
        return flask.redirect(flask.url_for('index.html'))
Пример #29
0
def learn_params_from_url():
    db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')

    form_input_topic = flask.request.form['topic_urls']
    form_input_random = flask.request.form['random_urls']
    language = flask.request.form['language']
    topic_urls = form_input_topic.split('\n')
    topic_urls = list(map(lambda x: x.rstrip(), topic_urls))
    random_urls = form_input_random.split('\n')
    random_urls = list(map(lambda x: x.rstrip(), random_urls))

    cls = kearch_classifier.classifier.Classifier()
    cls.learn_params_from_url(topic_urls, random_urls, language)
    cls.dump_params(kearch_classifier.classifier.PARAMS_FILE)

    bparam = open(kearch_classifier.classifier.PARAMS_FILE, 'rb').read()
    tparam = base64.b64encode(bparam).decode('utf-8')
    params = {'name': kearch_classifier.classifier.PARAMS_FILE, 'body': tparam}
    db_req.request(path='/sp/db/push_binary_file', params=params)

    ave.make_average_document_from_urls(random_urls, language)
    bparam = open(ave.CACHE_FILE, 'rb').read()
    tparam = base64.b64encode(bparam).decode('utf-8')
    params = {'name': ave.CACHE_FILE, 'body': tparam}
    db_req.request(path='/sp/db/push_binary_file', params=params)

    return flask.redirect(flask.url_for("index"))
Пример #30
0
def login():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']

        db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql')
        auth_info = db_req.request(path='/sp/db/get_authentication')
        is_valid = False
        for d in auth_info.values():
            u = d['username']
            h = d['password_hash']
            if u == username and \
               h == hashlib.sha512(password.encode('utf-8')).hexdigest():
                is_valid = True
        if is_valid:
            user = User(0)
            login_user(user)
            return redirect(flask.url_for("index"))
        else:
            return abort(401)
    else:
        return flask.render_template('login.html')