Exemplo n.º 1
0
def bootstrap_keyphrases_solicitations():
    db = connect_to_arango()
    graph = get_technode_graph(db)
    nlp = English()
    col_documents = db.col(SOLICITATION_COLLECTION)
    docs_lite = []
    for document in col_documents.all():
        doc_lite = {}
        doc_lite['_id'] = document['_id']
        if 'keywords' in document:
            doc_lite['keywords'] = document['keywords']
        if 'description' in document:
            doc_lite['description'] = document['description']
            docs_lite.append(doc_lite)
    # FIXME: creating mini-array rather than just straight-iterating over cursor
    # doing it this way to avoid cursor timeout issues, may no longer be needed in arango 3
    for document in docs_lite:
        if 'description' in document:
            process_doc_for_keyphrases(nlp, db, graph,
                                       KEYPHRASE_SOLICITATION_RELATIONS,
                                       document['description'],
                                       document['_id'])
        if 'keywords' in document:
            store_keyphrases_for_doc(db,
                                     graph,
                                     KEYPHRASE_SOLICITATION_RELATIONS,
                                     document['keywords'],
                                     document['_id'],
                                     vetted=True)
Exemplo n.º 2
0
def associate_solicitation_contract():
    db = connect_to_arango()
    graph = get_technode_graph(db)
    contracts = db.col('contracts')
    solicitations = db.col('solicitations')

    # for each solicitation
    for solicitation in solicitations.all():
        # find all the contracts that use the same topic code
        related_contracts = contracts.get_by_example({'topic_code':solicitation['topic']})
        time.sleep(0.1)
        for contract in related_contracts:
            graph.create_edge("solicitation_contract_relations", {"_from": solicitation['_id'],
                                                                  "_to": contract['_id']})
Exemplo n.º 3
0
def bootstrap_keyphrases_contracts():
    db = connect_to_arango()
    graph = get_technode_graph(db)
    nlp = English()
    col_documents = db.col(CONTRACT_COLLECTION)
    docs_lite = []
    for document in col_documents.all():
        doc_lite = {}
        doc_lite['_id'] = document['_id']
        if 'keywords' in document:
            doc_lite['keywords'] = document['keywords']
        if 'abstract' in document:
            doc_lite['abstract'] = document['abstract']
            docs_lite.append(doc_lite)
    for document in docs_lite:
        process_doc_for_keyphrases(nlp, db, graph,
                                   KEYPHRASE_CONTRACT_RELATIONS,
                                   document['abstract'], document['_id'])
        if 'keywords' in document:
            store_keyphrases_for_doc(db, graph, KEYPHRASE_CONTRACT_RELATIONS,
                                     document['keywords'], document['_id'])
Exemplo n.º 4
0
def bootstrap_keyphrases_r2s():
    db = connect_to_arango()
    graph = get_technode_graph(db)
    nlp = English()
    col_documents = db.col(R2_COLLECTION)
    docs_lite = []
    for document in col_documents.all():
        doc_lite = {}
        doc_lite['_id'] = document['_id']
        if 'program_desc' in document:
            doc_lite['program_desc'] = document['program_desc']
        if 'projects' in document:
            doc_lite['projects'] = document['projects']
        docs_lite.append(doc_lite)
    # FIXME: creating mini-array rather than just straight-iterating over cursor
    # doing it this way to avoid cursor timeout issues, may no longer be needed in arango 3

    for document in docs_lite:
        if 'program_desc' in document:
            process_doc_for_keyphrases(nlp, db, graph, KEYPHRASE_R2_RELATIONS,
                                       document['program_desc'],
                                       document['_id'])
        if 'projects' in document:
            for project in document['projects']:
                if 'accomp_planned' in document['projects'][project]:
                    # accomps are stored as a list, turn it into a long string.
                    # some are 0-length; for whatever reason
                    if len(document['projects'][project]
                           ['accomp_planned']) > 0:
                        accomp_str = "".join(
                            document['projects'][project]['accomp_planned'])
                        process_doc_for_keyphrases(nlp, db, graph,
                                                   KEYPHRASE_R2_RELATIONS,
                                                   accomp_str, document['_id'])
                if 'mission_desc' in document['projects'][project]:
                    process_doc_for_keyphrases(
                        nlp, db, graph, KEYPHRASE_R2_RELATIONS,
                        document['projects'][project]['mission_desc'],
                        document['_id'])
Exemplo n.º 5
0
def get_term_collection():
    db = connect_to_arango()
    return db.col('tech_terms')
Exemplo n.º 6
0
 def to_arango(self, colname='r2_exhibits'):
     db = connect_to_arango()
     graph = get_technode_graph(db)
     graph.create_vertex(colname, self)
Exemplo n.º 7
0
def get_term_collection():
    db = connect_to_arango()
    return db.col('tech_terms')
Exemplo n.º 8
0
    q = Queue('r2_load', connection=Redis())
    for file in r2_files:
        if file.lower().endswith(".xml"):
            q.enqueue(r2file_to_arango, file)
            #print "Loading %s" % (file)

def load_solicitation_listings():
    sque = Queue('solicitation_load', connection=Redis())
    listings = load_listings_from_file('/home/brian/technodeminer/local_listings.json')
    print "Enqueing solicitation load tasks"
    for listing in listings:
        #print "Adding %s to job q" % (listing['ComponentURL'])
        sque.enqueue(listing_to_graph, listing, graph, db)


def get_listings_from_web():
    return ListingReader()

if __name__ == '__main__':
    db = connect_to_arango()
    build_collections()
    graph = get_technode_graph(db)
    # Load SBIR Contracts before Solicitations
    print "Loading contracts"
    load_sbir_contracts(graph)
    print "Enqueuing solicitation jobs"
    load_solicitation_listings()
    print "Enqueuing r2 jobs"
    load_r2_exhibits()
Exemplo n.º 9
0
from technodeminer.persistence.graph import connect_to_arango, get_technode_graph
from pattern.vector import Model, Document


db = connect_to_arango()
graph = get_technode_graph(db)
r2_exhibits = db.col('r2_exhibits')
solicitations = db.col('solicitations')


# for each solicitation
#for solicitation in solicitations.all():
# constrain to air force FY16
solicit_gen = solicitations.get_by_example({"Component": "Air Force", "Fiscal Year": "FY16"})
r2_queries = [{"byear": 2013, "agency": "Air Force", "ba_num": 1},
              {"byear": 2013, "agency": "Air Force", "ba_num": 2},
              {"byear": 2013, "agency": "Air Force", "ba_num": 3}]

r2_2013 = {"byear": 2013}
# find all the contracts that use the same topic code
r2_list = []
for query in r2_queries:
    for r2 in r2_exhibits.get_by_example(query):
        try:
            strings = [r2['program_desc']]
            projects = [r2['projects'] for k in r2['projects'].keys()]
            for proj in projects:
                try:
                    strings.append(proj['mission_desc'])
                except KeyError as e:
                    pass