def colloc_concordance(hit, path, q, context_size): conc_text = fetch_concordance(hit, path, context_size) collocate = q['collocate'].decode('utf-8', 'ignore') collocate_match = re.compile(r'(.*\W)(%s)(\W.*)' % collocate, flags=re.U | re.I) conc_text = collocate_match.sub(r'\1<span class="collocate">\2</span>\3', conc_text) return conc_text
def colloc_concordance(hit, path, q, db): conc_text = fetch_concordance(hit, path, q) split_text = token_regex.split(conc_text) keep_text = [] for w in split_text: if w: if w.lower() == q['collocate'].decode('utf-8', 'ignore'): w = '<span class="collocate">%s</span>' % w keep_text.append(w) conc_text = ''.join(keep_text) return conc_text
def filter_words_by_property(hits, path, q, db, config, word_filter=True, filter_num=100, stopwords=True): concordance_object = {"query": dict([i for i in q])} length = config['concordance_length'] # Do these need to be captured in wsgi_handler? word_property = q["word_property"] word_property_value = q["word_property_value"] word_property_total = q["word_property_total"] new_hitlist = [] results = [] position = 0 more_pages = False if q.start == 0: start = 1 else: start = q.start for hit in hits: ## get my chunk of text ## hit_val = get_word_attrib(hit,word_property,db) if hit_val == word_property_value: position += 1 if position < start: continue new_hitlist.append(hit) citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] citation = concordance_citation(hit, citation_hrefs) context = fetch_concordance(db, hit, config.db_path, config.concordance_length) result_obj = {"philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1} results.append(result_obj) if len(new_hitlist) == (q.results_per_page): more_pages = True break end = start + len(results) - 1 if len(results) < q.results_per_page: word_property_total = end else: word_property_total = end + 1 concordance_object['results'] = results concordance_object["query_done"] = hits.done concordance_object['results_length'] = word_property_total concordance_object["description"] = {"start": start, "end": end, "results_per_page": q.results_per_page, "more_pages": more_pages} print >> sys.stderr, "DONE" return concordance_object
import os import re import sys sys.path.append('..') from functions.wsgi_handler import parse_cgi from reports.concordance import fetch_concordance from reports.theme_rheme import adjust_results import cgi import json from philologic.DB import DB if __name__ == "__main__": environ = os.environ path = environ['SCRIPT_FILENAME'] path = re.sub('(philo4/[^/]+/).*', '\\1', path) form = cgi.FieldStorage() num = int(form.getvalue('hit_num')) length = int(form.getvalue('length')) db, path_components, q = parse_cgi(environ) hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) if q['report'] != 'theme_rheme': conc_text = fetch_concordance(hits[num], path, q, length=length) else: new_hits, full_report = adjust_results(hits, path, q, length=length) conc_text = new_hits[num].concordance print "Content-Type: text/html\n" print conc_text.encode('utf-8', 'ignore')
def colloc_concordance(hit, path, q, context_size): conc_text = fetch_concordance(hit, path, context_size) collocate = q['collocate'].decode('utf-8', 'ignore') collocate_match = re.compile(r'(.*\W)(%s)(\W.*)' % collocate, flags=re.U|re.I) conc_text = collocate_match.sub(r'\1<span class="collocate">\2</span>\3', conc_text) return conc_text
#!/usr/bin/env python import os import re import sys sys.path.append('..') from functions.wsgi_handler import parse_cgi from reports.concordance import fetch_concordance from reports.theme_rheme import adjust_results import cgi import json from philologic.DB import DB if __name__ == "__main__": environ = os.environ path = environ['SCRIPT_FILENAME'] path = re.sub('(philo4/[^/]+/).*', '\\1', path) form = cgi.FieldStorage() num = int(form.getvalue('hit_num')) length = int(form.getvalue('length')) db, path_components, q = parse_cgi(environ) hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) if q['report'] != 'theme_rheme': conc_text = fetch_concordance(hits[num], path, q, length=length) else: new_hits, full_report = adjust_results(hits, path, q, length=length) conc_text = new_hits[num].concordance print "Content-Type: text/html\n" print conc_text.encode('utf-8', 'ignore')