def bibliography_results(db, q, config): if q.no_metadata: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(**q.metadata) start, end, n = f.link.page_interval(q.results_per_page, hits, q.start, q.end) bibliography_object = {"description": {"start": start, "end": end, "n": n, "results_per_page": q.results_per_page}, "query": dict([i for i in q])} results = [] result_type = 'doc' for hit in hits[start - 1:end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] result_type = hit.type if hit.type == "doc": citation = biblio_citation(hit, citation_hrefs) else: citation = r.concordance_citation(hit, citation_hrefs) results.append({'citation': citation, 'citation_links': citation_hrefs, 'philo_id': hit.philo_id, "metadata_fields": metadata_fields}) bibliography_object["results"] = results bibliography_object['results_length'] = len(hits) bibliography_object['query_done'] = hits.done bibliography_object['result_type'] = result_type return bibliography_object, hits
def filter_words_by_property(hits, path, q, db, config, word_filter=True, filter_num=100, stopwords=True): concordance_object = {"query": dict([i for i in q])} length = config['concordance_length'] # Do these need to be captured in wsgi_handler? word_property = q["word_property"] word_property_value = q["word_property_value"] word_property_total = q["word_property_total"] new_hitlist = [] results = [] position = 0 more_pages = False if q.start == 0: start = 1 else: start = q.start for hit in hits: ## get my chunk of text ## hit_val = get_word_attrib(hit,word_property,db) if hit_val == word_property_value: position += 1 if position < start: continue new_hitlist.append(hit) citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] citation = concordance_citation(hit, citation_hrefs) context = fetch_concordance(db, hit, config.db_path, config.concordance_length) result_obj = {"philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1} results.append(result_obj) if len(new_hitlist) == (q.results_per_page): more_pages = True break end = start + len(results) - 1 if len(results) < q.results_per_page: word_property_total = end else: word_property_total = end + 1 concordance_object['results'] = results concordance_object["query_done"] = hits.done concordance_object['results_length'] = word_property_total concordance_object["description"] = {"start": start, "end": end, "results_per_page": q.results_per_page, "more_pages": more_pages} print >> sys.stderr, "DONE" return concordance_object
def generate_text_object(obj, db, q, config): philo_id = list(obj.philo_id) while philo_id[-1] == 0: philo_id.pop() text_object = {"query": dict([i for i in q]), "philo_id": ' '.join([str(i) for i in philo_id])} text_object['prev'] = ' '.join(obj.prev.split()[:7][:philo_slices[obj.philo_type]]) text_object['next'] = ' '.join(obj.next.split()[:7][:philo_slices[obj.philo_type]]) metadata_fields = {} for metadata in db.locals['metadata_fields']: if db.locals['metadata_types'][metadata] == "doc": metadata_fields[metadata] = obj[metadata] text_object['metadata_fields'] = metadata_fields citation_hrefs = citation_links(db, config, obj) citation = biblio_citation(obj, citation_hrefs) text_object['citation'] = citation text = get_text_obj(obj, config, q, db.locals['word_regex']) text_object['text'] = text return text_object
def generate_text_object(obj, db, q, config, note=False): philo_id = list(obj.philo_id) while philo_id[-1] == 0: philo_id.pop() text_object = {"query": dict([i for i in q]), "philo_id": ' '.join([str(i) for i in philo_id])} text_object['prev'] = neighboring_object_id(db, obj.prev) text_object['next'] = neighboring_object_id(db, obj.next) metadata_fields = {} for metadata in db.locals['metadata_fields']: if db.locals['metadata_types'][metadata] == "doc": metadata_fields[metadata] = obj[metadata] text_object['metadata_fields'] = metadata_fields citation_hrefs = citation_links(db, config, obj) citation = biblio_citation(obj, citation_hrefs) text_object['citation'] = citation text, imgs = get_text_obj(obj, config, q, db.locals['word_regex'], note=note) text_object['text'] = text text_object['imgs'] = imgs return text_object
def generate_toc_object(obj, db, q, config): """This function fetches all philo_ids for div elements within a doc""" toms_object = nav_query(obj, db) text_hierarchy = [] for i in toms_object: if i['philo_name'] == '__philo_virtual' and i["philo_type"] != "div1": continue elif i['word_count'] == 0: continue else: philo_id = i['philo_id'] philo_type = i['philo_type'] display_name = "" if i['philo_name'] == "front": display_name = "Front Matter" elif i['philo_name'] == "note": continue else: display_name = i['head'] if display_name: display_name = display_name.strip() if not display_name: if i["type"] and i["n"]: display_name = i['type'] + " " + i["n"] else: display_name = i["head"] or i['type'] or i['philo_name'] or i['philo_type'] if display_name == "__philo_virtual": display_name = i['philo_type'] display_name = display_name[0].upper() + display_name[1:] link = f.make_absolute_object_link(config, philo_id.split()[:philo_slices[philo_type]]) philo_id = ' '.join(philo_id.split()[:philo_slices[philo_type]]) toc_element = {"philo_id": philo_id, "philo_type": philo_type, "label": display_name, "href": link} text_hierarchy.append(toc_element) metadata_fields = {} for metadata in db.locals['metadata_fields']: if db.locals['metadata_types'][metadata] == "doc": metadata_fields[metadata] = obj[metadata] citation_hrefs = citation_links(db, config, obj) citation = biblio_citation(obj, citation_hrefs) toc_object = {"query": dict([i for i in q]), "philo_id": obj.philo_id, "toc": text_hierarchy, "metadata_fields": metadata_fields, "citation": citation} return toc_object
def generate_kwic_results(db, q, config, link_to_hit="div1"): """ The link_to_hit keyword defines the text object to which the metadata link leads to""" hits = db.query(q["q"],q["method"],q["arg"],**q.metadata) start, end, n = f.link.page_interval(q.results_per_page, hits, q.start, q.end) kwic_object = {"description": {"start": start, "end": end, "results_per_page": q.results_per_page}, "query": dict([i for i in q])} kwic_results = [] length = config.concordance_length for hit in hits[start - 1:end]: # Get all metadata metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata].strip() ## Get all links and citations citation_hrefs = citation_links(db, config, hit) citation = concordance_citation(hit, citation_hrefs) ## Determine length of text needed byte_distance = hit.bytes[-1] - hit.bytes[0] length = config.concordance_length + byte_distance + config.concordance_length ## Get concordance and align it bytes, byte_start = adjust_bytes(hit.bytes, config.concordance_length) conc_text = f.get_text(hit, byte_start, length, config.db_path) conc_text = format_strip(conc_text, bytes) conc_text = KWIC_formatter(conc_text, len(hit.bytes)) kwic_result = {"philo_id": hit.philo_id, "context": conc_text, "metadata_fields": metadata_fields, "citation_links": citation_hrefs, "citation": citation, "bytes": hit.bytes} kwic_results.append(kwic_result) kwic_object['results'] = kwic_results kwic_object['results_length'] = len(hits) kwic_object["query_done"] = hits.done return kwic_object