def term_group(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = f.WebConfig() db = DB(config.db_path + '/data/') request = WSGIHandler(db, environ) hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) parsed = parse_query(request.q) group = group_terms(parsed) all_groups = split_terms(group) term_groups = [] for g in all_groups: term_group = '' not_started = False for kind, term in g: if kind == 'NOT': if not_started == False: not_started = True term_group += ' NOT ' elif kind == 'OR': term_group += '|' elif kind == "TERM": term_group += ' %s ' % term elif kind == "QUOTE": term_group += ' %s ' % term term_group = term_group.strip() term_groups.append(term_group) yield json.dumps(term_groups)
def term_group(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if not request["q"]: dump = json.dumps({"original_query": "", "term_groups": []}) else: hits = db.query(request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata) parsed = parse_query(request.q) group = group_terms(parsed) all_groups = split_terms(group) term_groups = [] for g in all_groups: term_group = '' not_started = False for kind, term in g: if kind == 'NOT': if not_started is False: not_started = True term_group += ' NOT ' elif kind == 'OR': term_group += '|' elif kind == "TERM": term_group += ' %s ' % term elif kind == "QUOTE": term_group += ' %s ' % term term_group = term_group.strip() term_groups.append(term_group) dump = json.dumps({"term_groups": term_groups, "original_query": request.original_q}) yield dump.encode('utf8')
def format_query(q, db, config): parsed = parse_query(q) group = group_terms(parsed) all_groups = split_terms(group) # We extract every word tuple word_groups = [] for g in all_groups: for inner_g in g: word_groups.append(inner_g) last_group = word_groups.pop() # we take the last tuple for autocomplete token = last_group[1] kind = last_group[0] if word_groups: prefix = ' '.join([i[1] for i in word_groups]) + " " else: prefix = '' frequency_file = config.db_path + "/data/frequencies/normalized_word_frequencies" if kind == "TERM": expanded_token = token + '.*' grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE, db.locals['lowercase_index']) elif kind == "QUOTE": expanded_token = token[:-1] + '.*' + token[-1] grep_proc = grep_exact(expanded_token, frequency_file, subprocess.PIPE) elif kind == "NOT" or kind == "OR": return [] matches = [] len_token = len(token) for line in grep_proc.stdout: word = line.split(b'\t')[1].strip().decode('utf8') highlighted_word = highlighter(word, len_token) matches.append(highlighted_word) output_string = [] for m in matches: if kind == "QUOTE": output_string.append(prefix + '"%s"' % m) else: output_string.append(prefix + m) return output_string
def format_query(q, db, config): parsed = parse_query(q) group = group_terms(parsed) all_groups = split_terms(group) # We extract every word tuple word_groups = [] for g in all_groups: for inner_g in g: word_groups.append(inner_g) last_group = word_groups.pop() # we take the last tuple for autocomplete token = last_group[1] kind = last_group[0] if word_groups: prefix = ' '.join([i[1] for i in word_groups]) + " " else: prefix = '' frequency_file = config.db_path + "/data/frequencies/normalized_word_frequencies" if kind == "TERM": expanded_token = token + '.*' grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE, db.locals['lowercase_index']) elif kind == "QUOTE": expanded_token = token[:-1] + '.*' + token[-1] grep_proc = grep_exact(expanded_token, frequency_file, subprocess.PIPE) elif kind == "NOT" or kind == "OR": return [] matches = [] len_token = len(token.decode('utf-8')) for line in grep_proc.stdout: word = line.split('\t')[1].strip() highlighted_word = highlighter(word, len_token) matches.append(highlighted_word) output_string = [] for m in matches: if kind == "QUOTE": output_string.append(prefix + '"%s"' % m) else: output_string.append(prefix + m) return output_string
def format_query(q, db): parsed = parse_query(q) group = group_terms(parsed) all_groups = split_terms(group) # We extract every word tuple word_groups = [] for g in all_groups: for inner_g in g: word_groups.append(inner_g) last_group = word_groups.pop() ## we take the last tuple for autocomplete token = last_group[1] kind = last_group[0] if word_groups: prefix = ' '.join([i[1] for i in word_groups]) + " " else: prefix = '' if kind == "OR": return [] if kind == "QUOTE": token = token.replace('"', '') frequency_file = db.locals[ "db_path"] + "/frequencies/normalized_word_frequencies" expanded_token = token + '.*' grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE) matches = [] len_token = len(token.decode('utf-8')) for line in grep_proc.stdout: word = line.split('\t')[1] highlighted_word = highlighter(word, len_token) matches.append(highlighted_word) output_string = [] for m in matches: if kind == "QUOTE": output_string.append(prefix + '"%s"' % m) else: output_string.append(prefix + m) return output_string
def term_group(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if not request["q"]: dump = json.dumps({"original_query": "", "term_groups": []}) else: hits = db.query(request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata) parsed = parse_query(request.q) group = group_terms(parsed) all_groups = split_terms(group) term_groups = [] for g in all_groups: term_group = '' not_started = False for kind, term in g: if kind == 'NOT': if not_started is False: not_started = True term_group += ' NOT ' elif kind == 'OR': term_group += '|' elif kind == "TERM": term_group += ' %s ' % term elif kind == "QUOTE": term_group += ' %s ' % term term_group = term_group.strip() term_groups.append(term_group) dump = json.dumps({ "term_groups": term_groups, "original_query": request.original_q }) yield dump.encode('utf8')
def format_query(q, db): parsed = parse_query(q) group = group_terms(parsed) all_groups = split_terms(group) # We extract every word tuple word_groups = [] for g in all_groups: for inner_g in g: word_groups.append(inner_g) last_group = word_groups.pop() ## we take the last tuple for autocomplete token = last_group[1] kind = last_group[0] if word_groups: prefix = ' '.join([i[1] for i in word_groups]) + " " else: prefix = '' if kind == "OR": return [] if kind == "QUOTE": token = token.replace('"', '') frequency_file = db.locals["db_path"]+"/frequencies/normalized_word_frequencies" expanded_token = token + '.*' grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE) matches = [] len_token = len(token.decode('utf-8')) for line in grep_proc.stdout: word = line.split('\t')[1] highlighted_word = highlighter(word, len_token) matches.append(highlighted_word) output_string = [] for m in matches: if kind == "QUOTE": output_string.append(prefix + '"%s"' % m) else: output_string.append(prefix + m) return output_string