Exemplo n.º 1
0
def bibliography_results(db, q, config):
    if q.no_metadata:
        hits = db.get_all(db.locals['default_object_level'])
    else:
        hits = db.query(**q.metadata)
    start, end, n = f.link.page_interval(q.results_per_page, hits, q.start, q.end)
    bibliography_object = {"description": {"start": start, "end": end, "n": n, "results_per_page": q.results_per_page},
                          "query": dict([i for i in q])}
    results = []
    result_type = 'doc'
    for hit in hits[start - 1:end]:
        citation_hrefs = citation_links(db, config, hit)
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata]
        result_type = hit.type
        if hit.type == "doc":
            citation = biblio_citation(hit, citation_hrefs)
        else:
            citation = r.concordance_citation(hit, citation_hrefs)
        results.append({'citation': citation, 'citation_links': citation_hrefs, 'philo_id': hit.philo_id, "metadata_fields": metadata_fields})
    bibliography_object["results"] = results
    bibliography_object['results_length'] = len(hits)
    bibliography_object['query_done'] = hits.done
    bibliography_object['result_type'] = result_type
    return bibliography_object, hits        
Exemplo n.º 2
0
def filter_words_by_property(hits, path, q, db, config, word_filter=True, filter_num=100, stopwords=True):
    concordance_object = {"query": dict([i for i in q])}

    length = config['concordance_length']

    # Do these need to be captured in wsgi_handler?
    word_property = q["word_property"]
    word_property_value = q["word_property_value"]
    word_property_total = q["word_property_total"]
    
    new_hitlist = []
    results = []
    position = 0
    more_pages = False
    
    if q.start == 0:
        start = 1
    else:
        start = q.start

    for hit in hits:
        ## get my chunk of text ##
        hit_val = get_word_attrib(hit,word_property,db)

        if hit_val == word_property_value:
            position += 1
            if position < start:
                continue
            new_hitlist.append(hit)
            citation_hrefs = citation_links(db, config, hit)
            metadata_fields = {}
            for metadata in db.locals['metadata_fields']:
                metadata_fields[metadata] = hit[metadata]
            citation = concordance_citation(hit, citation_hrefs)
            context = fetch_concordance(db, hit, config.db_path, config.concordance_length)
            result_obj = {"philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context,
                          "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1}            
            results.append(result_obj)

        if len(new_hitlist) == (q.results_per_page):
            more_pages = True
            break
    
    end = start + len(results) - 1 
    if len(results) < q.results_per_page:
        word_property_total = end
    else:
        word_property_total = end + 1
    concordance_object['results'] = results
    concordance_object["query_done"] = hits.done
    concordance_object['results_length'] = word_property_total
    concordance_object["description"] = {"start": start, "end": end, "results_per_page": q.results_per_page, "more_pages": more_pages}    
    print >> sys.stderr, "DONE"
    return concordance_object
Exemplo n.º 3
0
def generate_text_object(obj, db, q, config):
    philo_id = list(obj.philo_id)
    while philo_id[-1] == 0:
        philo_id.pop()
    text_object = {"query": dict([i for i in q]), "philo_id": ' '.join([str(i) for i in philo_id])}
    text_object['prev'] = ' '.join(obj.prev.split()[:7][:philo_slices[obj.philo_type]])
    text_object['next'] = ' '.join(obj.next.split()[:7][:philo_slices[obj.philo_type]])
    metadata_fields = {}
    for metadata in db.locals['metadata_fields']:
        if db.locals['metadata_types'][metadata] == "doc":
            metadata_fields[metadata] = obj[metadata]
    text_object['metadata_fields'] = metadata_fields
    citation_hrefs = citation_links(db, config, obj)
    citation = biblio_citation(obj, citation_hrefs)
    text_object['citation'] = citation
    text = get_text_obj(obj, config, q, db.locals['word_regex'])
    text_object['text'] = text
    return text_object
Exemplo n.º 4
0
def generate_text_object(obj, db, q, config, note=False):
    philo_id = list(obj.philo_id)
    while philo_id[-1] == 0:
        philo_id.pop()
    text_object = {"query": dict([i for i in q]), "philo_id": ' '.join([str(i) for i in philo_id])}
    text_object['prev'] = neighboring_object_id(db, obj.prev)
    text_object['next'] = neighboring_object_id(db, obj.next)
    metadata_fields = {}
    for metadata in db.locals['metadata_fields']:
        if db.locals['metadata_types'][metadata] == "doc":
            metadata_fields[metadata] = obj[metadata]
    text_object['metadata_fields'] = metadata_fields
    citation_hrefs = citation_links(db, config, obj)
    citation = biblio_citation(obj, citation_hrefs)
    text_object['citation'] = citation
    text, imgs = get_text_obj(obj, config, q, db.locals['word_regex'], note=note)
    text_object['text'] = text
    text_object['imgs'] = imgs
    return text_object
Exemplo n.º 5
0
def generate_toc_object(obj, db, q, config):
    """This function fetches all philo_ids for div elements within a doc"""
    toms_object = nav_query(obj, db)
    text_hierarchy = []
    for i in toms_object:
        if i['philo_name'] == '__philo_virtual' and i["philo_type"] != "div1":
            continue
        elif i['word_count'] == 0:
            continue
        else:
            philo_id = i['philo_id']
            philo_type = i['philo_type']
            display_name = ""
            if i['philo_name'] == "front":
                display_name = "Front Matter"
            elif i['philo_name'] == "note":
                continue
            else:
                display_name = i['head']
                if display_name:
                    display_name = display_name.strip()
                if not display_name:
                    if i["type"] and i["n"]:
                        display_name = i['type'] + " " + i["n"]
                    else:
                        display_name = i["head"] or i['type'] or i['philo_name'] or i['philo_type']
                        if display_name == "__philo_virtual":
                            display_name = i['philo_type']
            display_name = display_name[0].upper() + display_name[1:]
            link = f.make_absolute_object_link(config, philo_id.split()[:philo_slices[philo_type]])
            philo_id = ' '.join(philo_id.split()[:philo_slices[philo_type]])
            toc_element = {"philo_id": philo_id, "philo_type": philo_type, "label": display_name, "href": link}
            text_hierarchy.append(toc_element)
    metadata_fields = {}
    for metadata in db.locals['metadata_fields']:
        if db.locals['metadata_types'][metadata] == "doc":
            metadata_fields[metadata] = obj[metadata]
    citation_hrefs = citation_links(db, config, obj)
    citation = biblio_citation(obj, citation_hrefs)
    toc_object = {"query": dict([i for i in q]), "philo_id": obj.philo_id, "toc": text_hierarchy, "metadata_fields": metadata_fields,
                  "citation": citation}
    return toc_object
Exemplo n.º 6
0
def generate_kwic_results(db, q, config, link_to_hit="div1"):
    """ The link_to_hit keyword defines the text object to which the metadata link leads to"""
    hits = db.query(q["q"],q["method"],q["arg"],**q.metadata)
    start, end, n = f.link.page_interval(q.results_per_page, hits, q.start, q.end)
    kwic_object = {"description": {"start": start, "end": end, "results_per_page": q.results_per_page},
                    "query": dict([i for i in q])}
    kwic_results = []
    
    length = config.concordance_length
    
    for hit in hits[start - 1:end]:
        # Get all metadata
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata].strip()
        
        ## Get all links and citations
        citation_hrefs = citation_links(db, config, hit)
        citation = concordance_citation(hit, citation_hrefs)
            
        ## Determine length of text needed
        byte_distance = hit.bytes[-1] - hit.bytes[0]
        length = config.concordance_length + byte_distance + config.concordance_length
            
        ## Get concordance and align it
        bytes, byte_start = adjust_bytes(hit.bytes, config.concordance_length)
        conc_text = f.get_text(hit, byte_start, length, config.db_path)
        conc_text = format_strip(conc_text, bytes)
        conc_text = KWIC_formatter(conc_text, len(hit.bytes))

        kwic_result = {"philo_id": hit.philo_id, "context": conc_text, "metadata_fields": metadata_fields,
                       "citation_links": citation_hrefs, "citation": citation, "bytes": hit.bytes}
        kwic_results.append(kwic_result)
    kwic_object['results'] = kwic_results
    kwic_object['results_length'] = len(hits)
    kwic_object["query_done"] = hits.done
    
    return kwic_object