示例#1
0
def term_list(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + "/data/")
    q = WSGIHandler(db, environ)
    hits = db.query(q["q"], q["method"], q["arg"], **q.metadata)
    expanded_terms = get_expanded_query(hits)
    yield json.dumps(expanded_terms[0])
示例#2
0
def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield json.dumps(expanded_terms[0]).encode('utf8')
示例#3
0
def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield json.dumps(expanded_terms[0])
示例#4
0
def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield simplejson.dumps(expanded_terms[0])
示例#5
0
def get_all_words(db, request):
    """Expand query to all search terms."""
    words = request["q"].replace('"', '')
    hits = db.query(words)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    word_groups = []
    for word_group in expanded_terms:
        normalized_group = []
        for word in word_group:
            word = ''.join([i for i in unicodedata.normalize("NFKD", word) if not unicodedata.combining(i)])
            normalized_group.append(word)
        word_groups.append(normalized_group)
    return word_groups
def get_all_words(db, request):
    """Expand query to all search terms."""
    words = request["q"].replace('"', '')
    hits = db.query(words)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    word_groups = []
    for word_group in expanded_terms:
        normalized_group = []
        for word in word_group:
            word = u''.join([i for i in unicodedata.normalize("NFKD", word.decode('utf8')) if not unicodedata.combining(i)]).encode("utf-8")
            normalized_group.append(word)
        word_groups.append(normalized_group)
    return word_groups
def collocation_results(request, config):
    """Fetch collocation results"""
    db = DB(config.db_path + '/data/')
    if request["collocate_distance"]:
        hits = db.query(request["q"], "proxy",
                        int(request['collocate_distance']), **request.metadata)
    else:
        hits = db.query(request["q"], "cooc", request["arg"],
                        **request.metadata)
    hits.finish()
    collocation_object = {"query": dict([i for i in request])}

    try:
        collocate_distance = int(request['collocate_distance'])
    except ValueError:  # Getting an empty string since the keyword is not specificed in the URL
        collocate_distance = None

    if request.colloc_filter_choice == "nofilter":
        filter_list = []
    else:
        filter_list = build_filter_list(request, config)
    collocation_object['filter_list'] = filter_list
    filter_list = set(filter_list)

    # Build list of search terms to filter out
    query_words = []
    for group in get_expanded_query(hits):
        for word in group:
            word = word.replace('"', '')
            query_words.append(word)
    query_words = set(query_words)
    filter_list = filter_list.union(query_words)

    if request["collocate_distance"]:
        hits = db.query(request["q"],
                        "proxy",
                        int(request['collocate_distance']),
                        raw_results=True,
                        **request.metadata)
    else:
        hits = db.query(request["q"],
                        "cooc",
                        request["arg"],
                        raw_results=True,
                        **request.metadata)
    hits.finish()

    stored_sentence_id = None
    stored_sentence_counts = defaultdict(int)
    sentence_hit_count = 1
    hits_done = request.start or 0
    max_time = request.max_time or 10
    all_collocates = defaultdict(lambda: {'count': 0})
    cursor = db.dbh.cursor()
    start_time = timeit.default_timer()
    try:
        for hit in hits[hits_done:]:
            word_id = ' '.join([str(i) for i in hit[:6]]) + ' ' + str(hit[7])
            query = """select parent, rowid from words where philo_id='%s' limit 1""" % word_id
            cursor.execute(query)
            result = cursor.fetchone()
            parent = result['parent']
            if parent != stored_sentence_id:
                rowid = int(result['rowid'])
                sentence_hit_count = 1
                stored_sentence_id = parent
                stored_sentence_counts = defaultdict(int)
                if collocate_distance:
                    begin_rowid = rowid - collocate_distance
                    if begin_rowid < 0:
                        begin_rowid = 0
                    end_rowid = rowid + collocate_distance
                    row_query = """select philo_name from words where parent='%s' and rowid between %d and %d""" % (
                        parent, begin_rowid, end_rowid)
                else:
                    row_query = """select philo_name from words where parent='%s'""" % (
                        parent, )
                cursor.execute(row_query)
                for i in cursor.fetchall():
                    collocate = i["philo_name"]
                    if collocate not in filter_list:
                        stored_sentence_counts[collocate] += 1
            else:
                sentence_hit_count += 1
            for word in stored_sentence_counts:
                if stored_sentence_counts[word] < sentence_hit_count:
                    continue
                all_collocates[word]['count'] += 1
            hits_done += 1
            elapsed = timeit.default_timer() - start_time
            # avoid timeouts by splitting the query if more than request.max_time (in
            # seconds) has been spent in the loop
            if elapsed > int(max_time):
                break
    except IndexError:
        collocation_object['hits_done'] = len(hits)

    collocation_object['collocates'] = all_collocates
    collocation_object["results_length"] = len(hits)
    if hits_done < collocation_object["results_length"]:
        collocation_object['more_results'] = True
        collocation_object['hits_done'] = hits_done
    else:
        collocation_object['more_results'] = False
        collocation_object['hits_done'] = collocation_object["results_length"]

    return collocation_object
示例#8
0
def collocation_results(request, config):
    """Fetch collocation results"""
    db = DB(config.db_path + '/data/')
    if request["collocate_distance"]:
        hits = db.query(request["q"], "proxy", int(request['collocate_distance']), **request.metadata)
    else:
        hits = db.query(request["q"], "cooc", request["arg"], **request.metadata)
    hits.finish()
    collocation_object = {"query": dict([i for i in request])}

    try:
        collocate_distance = int(request['collocate_distance'])
    except ValueError:  # Getting an empty string since the keyword is not specificed in the URL
        collocate_distance = None

    if request.colloc_filter_choice == "nofilter":
        filter_list = []
    else:
        filter_list = build_filter_list(request, config)
    collocation_object['filter_list'] = filter_list
    filter_list = set(filter_list)

    # Build list of search terms to filter out
    query_words = []
    for group in get_expanded_query(hits):
        for word in group:
            word = word.replace('"', '')
            query_words.append(word)
    query_words = set(query_words)
    filter_list = filter_list.union(query_words)

    if request["collocate_distance"]:
        hits = db.query(request["q"], "proxy", int(request['collocate_distance']), raw_results=True, **request.metadata)
    else:
        hits = db.query(request["q"], "cooc", request["arg"], raw_results=True, **request.metadata)
    hits.finish()

    stored_sentence_id = None
    stored_sentence_counts = defaultdict(int)
    sentence_hit_count = 1
    hits_done = request.start or 0
    max_time = request.max_time or 10
    all_collocates = defaultdict(lambda: {'count': 0})
    cursor = db.dbh.cursor()
    start_time = timeit.default_timer()
    try:
        for hit in hits[hits_done:]:
            word_id = ' '.join([str(i) for i in hit[:6]]) + ' ' + str(hit[7])
            query = """select parent, rowid from words where philo_id='%s' limit 1""" % word_id
            cursor.execute(query)
            result = cursor.fetchone()
            parent = result['parent']
            if parent != stored_sentence_id:
                rowid = int(result['rowid'])
                sentence_hit_count = 1
                stored_sentence_id = parent
                stored_sentence_counts = defaultdict(int)
                if collocate_distance:
                    begin_rowid = rowid - collocate_distance
                    if begin_rowid < 0:
                        begin_rowid = 0
                    end_rowid = rowid + collocate_distance
                    row_query = """select philo_name from words where parent='%s' and rowid between %d and %d""" % (
                        parent, begin_rowid, end_rowid)
                else:
                    row_query = """select philo_name from words where parent='%s'""" % (parent, )
                cursor.execute(row_query)
                for i in cursor.fetchall():
                    collocate = i["philo_name"]
                    if collocate not in filter_list:
                        stored_sentence_counts[collocate] += 1
            else:
                sentence_hit_count += 1
            for word in stored_sentence_counts:
                if stored_sentence_counts[word] < sentence_hit_count:
                    continue
                all_collocates[word]['count'] += 1
            hits_done += 1
            elapsed = timeit.default_timer() - start_time
            # avoid timeouts by splitting the query if more than request.max_time (in
            # seconds) has been spent in the loop
            if elapsed > int(max_time):
                break
    except IndexError:
        collocation_object['hits_done'] = len(hits)

    collocation_object['collocates'] = all_collocates
    collocation_object["results_length"] = len(hits)
    if hits_done < collocation_object["results_length"]:
        collocation_object['more_results'] = True
        collocation_object['hits_done'] = hits_done
    else:
        collocation_object['more_results'] = False
        collocation_object['hits_done'] = collocation_object["results_length"]

    return collocation_object
示例#9
0
def fetch_collocation(hits, q, db, config):
    collocation_object = {"query": dict([i for i in q])}
    
    length = config['concordance_length']
    try:
        within_x_words = int(q['word_num'])
    except ValueError: ## Getting an empty string since the keyword is not specificed in the URL
        within_x_words = 5
    
    if q.colloc_filter_choice == "nofilter":
        filter_list = []
    else:
        filter_list = build_filter_list(q, config)
    collocation_object['filter_list'] = list(filter_list)
        
    
    ## start going though hits ##
    all_collocates = {}
    
    count = 0
    
    more_results = False
    c = db.dbh.cursor()
    parents = {}
    
    # Build list of search terms to filter out
    query_words = set([])
    for group in get_expanded_query(hits):
        for word in group:
            word = word.replace('"', '')
            query_words.add(word)
        
    stored_sentence_id = None
    stored_sentence_counts = {}
    sentence_hit_count = 1
    hits_done = q.start or 0
    start_time = timeit.default_timer()
    max_time = q.max_time or 10
    try:
        for hit in hits[hits_done:]:
            word_id = ' '.join([str(i) for i in hit.philo_id])
            query = """select philo_name, parent from words where philo_id='%s'""" % word_id
            c.execute(query)
            result = c.fetchone()
            parent = result['parent']
            current_word = result['philo_name']
            if parent != stored_sentence_id:           
                sentence_hit_count = 1
                stored_sentence_id = parent
                stored_sentence_counts = {}
                row_query = """select philo_name from words where parent='%s'"""  % (parent,)
                c.execute(row_query)
                for i in c.fetchall():
                    if i['philo_name'] in stored_sentence_counts:
                        stored_sentence_counts[i['philo_name']] += 1
                    else:
                        stored_sentence_counts[i['philo_name']] = 1
            else:
                sentence_hit_count += 1              
            for word in stored_sentence_counts:
                if word in query_words or stored_sentence_counts[word] < sentence_hit_count:
                     continue
                if word in filter_list:
                    continue
                query_string = q['q'] + ' "%s"' % word
                method = 'cooc'
                if word in all_collocates:
                    all_collocates[word]['count'] += 1
                else:
                    all_link = f.link.make_absolute_query_link(config, q, report="concordance", q=query_string, method=method, start='0', end='0')
                    all_collocates[word] = {"count": 1, "url": all_link}
            hits_done += 1
            elapsed = timeit.default_timer() - start_time
            if elapsed > int(max_time): # avoid timeouts by splitting the query if more than q.max_time (in seconds) has been spent in the loop
                break
    except IndexError:
        collocation['hits_done'] = len(hits)
    
    collocation_object['collocates'] = all_collocates
    
    collocation_object["results_length"] = len(hits)
    if hits_done < collocation_object["results_length"]:
        collocation_object['more_results'] = True
        collocation_object['hits_done'] = hits_done
    else:
        collocation_object['more_results'] = False
        collocation_object['hits_done'] = collocation_object["results_length"]
    
    return collocation_object