def test_institution_counts(models): """ Document_Institution.institution_counts() should provide syllabus counts for each institution id. """ i1 = Institution.create() i2 = Institution.create() i3 = Institution.create() d1 = Document.create(path='d1') d2 = Document.create(path='d2') d3 = Document.create(path='d3') d4 = Document.create(path='d4') d5 = Document.create(path='d5') d6 = Document.create(path='d6') # 1 document for institution 1. Document_Institution.create(institution=i1, document=d1) # 2 documents for institution 2. Document_Institution.create(institution=i2, document=d2) Document_Institution.create(institution=i2, document=d3) # 3 documents for institution 3. Document_Institution.create(institution=i3, document=d4) Document_Institution.create(institution=i3, document=d5) Document_Institution.create(institution=i3, document=d6) assert Document_Institution.institution_counts() == { d1.id: 1, d2.id: 2, d3.id: 3, }
def insert_institutions(): """ Insert institution rows. """ Institution.insert_institutions()
def cited(out_file): """ CSV with institution id, name, and citation count. """ # CSV writer. cols = ["id", "count", "name"] writer = csv.DictWriter(out_file, cols) writer.writeheader() count = fn.Count(HLOM_Citation.id) cited = ( Institution.select(Institution, count) .join(Document_Institution) # Join citations. .join(HLOM_Citation, on=(Document_Institution.document == HLOM_Citation.document)) .group_by(Institution.id) .order_by(count.desc()) ) for inst in cited.naive(): writer.writerow({"count": inst.count, "id": inst.id, "name": inst.metadata["Institution_Name"]})
def queue_geocode(): """ Queue geocoding tasks in the worker. :param in_file: A handle on the input CSV. """ for inst in Institution.select(): config.rq.enqueue(geocode, inst.id)
def test_state_abbreviations(add_hlom, add_doc): """ HLOM_Citation.index_state() should denormalize state abbreviations. """ t1 = add_hlom() t2 = add_hlom() t3 = add_hlom() s1 = add_doc('syllabus1') s2 = add_doc('syllabus2') s3 = add_doc('syllabus3') c1 = HLOM_Citation.create(document=s1, record=t1) c2 = HLOM_Citation.create(document=s2, record=t2) c3 = HLOM_Citation.create(document=s3, record=t3) # Create institutions with states. AL = Institution.create(metadata={'Institution_State': 'AL'}) CT = Institution.create(metadata={'Institution_State': 'CT'}) CA = Institution.create(metadata={'Institution_State': 'CA'}) # Link documents -> institutions. Document_Institution.create(document=t1, institution=AL) Document_Institution.create(document=t2, institution=CT) Document_Institution.create(document=t3, institution=CA) HLOM_Citation.index_institutions() c1 = HLOM_Citation.reload(c1) c2 = HLOM_Citation.reload(c2) c3 = HLOM_Citation.reload(c3) assert c1.state == 'AL' assert c2.state == 'CT' assert c3.state == 'CA'
def test_institution_ids(add_hlom, add_doc): """ HLOM_Citation.index_state() should denormalize institution ids. """ t1 = add_hlom() t2 = add_hlom() t3 = add_hlom() s1 = add_doc('syllabus1') s2 = add_doc('syllabus2') s3 = add_doc('syllabus3') c1 = HLOM_Citation.create(document=s1, record=t1) c2 = HLOM_Citation.create(document=s2, record=t2) c3 = HLOM_Citation.create(document=s3, record=t3) i1 = Institution.create() i2 = Institution.create() i3 = Institution.create() # Link documents -> institutions. Document_Institution.create(document=t1, institution=i1) Document_Institution.create(document=t2, institution=i2) Document_Institution.create(document=t3, institution=i3) HLOM_Citation.index_institutions() c1 = HLOM_Citation.reload(c1) c2 = HLOM_Citation.reload(c2) c3 = HLOM_Citation.reload(c3) assert c1.institution == i1 assert c2.institution == i2 assert c3.institution == i3
def geocode(id): """ Geocode an institution. Args: id (int): The institution id. """ coder = OpenMapQuest(config['mapquest']['api_key']) inst = Institution.get(Institution.id==id) # Geocode. location = coder.geocode(inst.geocoding_query, timeout=10) if location: # Write the coordinate. inst.metadata['Latitude'] = location.latitude inst.metadata['Longitude'] = location.longitude inst.save()
def lonlats(out_file): """ CSV with institution name and lon/lat, for Fusion Tables. """ # CSV writer. cols = ["name", "longitude", "latitude"] writer = csv.DictWriter(out_file, cols) writer.writeheader() # Select rows with coordinates. geocoded = Institution.select().where(Institution.metadata.contains("Latitude")) for inst in geocoded: writer.writerow( { "name": inst.metadata["Institution_Name"], "longitude": inst.metadata["Longitude"], "latitude": inst.metadata["Latitude"], } )