def ingest(cls, package='osp.fields', path='data/fields.csv'): """ Ingest fields. Args: package (str) path (str) """ reader = read_csv(package, path) for row in reader: # Sanifize field names. pf = clean_field_name(row['Primary Field']) sf = clean_field_name(row['Secondary Field']) # Parse abbreviations. abbrs = parse_abbrs(row['ABBRV']) if abbrs: abbrs = filter_abbrs(abbrs) # If parent field, write row. if bool(row['Alpha Category']): Field.create(name=pf) # Query for a parent field. field = Field.select().where(Field.name == pf).first() if field: Subfield.create( name=sf, abbreviations=abbrs, field=field, )
def ingest(cls, package="osp.fields", path="data/fields.csv"): """ Ingest fields. Args: package (str) path (str) """ reader = read_csv(package, path) for row in reader: # Sanifize field names. pf = clean_field_name(row["Primary Field"]) sf = clean_field_name(row["Secondary Field"]) # Parse abbreviations. abbrs = parse_abbrs(row["ABBRV"]) if abbrs: abbrs = filter_abbrs(abbrs) # If parent field, write row. if bool(row["Alpha Category"]): Field.create(name=pf) # Query for a parent field. field = Field.select().where(Field.name == pf).first() if field: Subfield.create(name=sf, abbreviations=abbrs, field=field)
def test_field_facets(add_citation, add_subfield, add_subfield_document): """ field_facets() should provide a list of label/value/count dicts. """ f1 = Field.create(name='Field 1') f2 = Field.create(name='Field 2') f3 = Field.create(name='Field 3') sf1 = add_subfield(field=f1) sf2 = add_subfield(field=f2) sf3 = add_subfield(field=f3) for i in range(3): c = add_citation() add_subfield_document(subfield=sf1, document=c.document) for i in range(2): c = add_citation() add_subfield_document(subfield=sf2, document=c.document) for i in range(1): c = add_citation() add_subfield_document(subfield=sf3, document=c.document) Citation_Index.es_insert() Field_Index.es_insert() facets = field_facets() assert facets == [ dict(label='Field 1', value=f1.id, count=3), dict(label='Field 2', value=f2.id, count=2), dict(label='Field 3', value=f3.id, count=1), ]
def test_insert_rows(): """ Subfield.ingest() should load field and subfield rows. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/insert_rows.csv', ) assert Field.select().count() == 3 assert Subfield.select().count() == 9 f1 = Field.get(Field.name=='Field1') f2 = Field.get(Field.name=='Field2') f3 = Field.get(Field.name=='Field3') sf1 = Subfield.get(Subfield.name=='Subfield1') sf2 = Subfield.get(Subfield.name=='Subfield2') sf3 = Subfield.get(Subfield.name=='Subfield3') sf4 = Subfield.get(Subfield.name=='Subfield4') sf5 = Subfield.get(Subfield.name=='Subfield5') sf6 = Subfield.get(Subfield.name=='Subfield6') sf7 = Subfield.get(Subfield.name=='Subfield7') sf8 = Subfield.get(Subfield.name=='Subfield8') sf9 = Subfield.get(Subfield.name=='Subfield9') assert sf1.field == f1 assert sf2.field == f1 assert sf3.field == f1 assert sf4.field == f2 assert sf5.field == f2 assert sf6.field == f2 assert sf7.field == f3 assert sf8.field == f3 assert sf9.field == f3
def test_insert_rows(): """ Subfield.ingest() should load field and subfield rows. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/insert_rows.csv', ) assert Field.select().count() == 3 assert Subfield.select().count() == 9 f1 = Field.get(Field.name == 'Field1') f2 = Field.get(Field.name == 'Field2') f3 = Field.get(Field.name == 'Field3') sf1 = Subfield.get(Subfield.name == 'Subfield1') sf2 = Subfield.get(Subfield.name == 'Subfield2') sf3 = Subfield.get(Subfield.name == 'Subfield3') sf4 = Subfield.get(Subfield.name == 'Subfield4') sf5 = Subfield.get(Subfield.name == 'Subfield5') sf6 = Subfield.get(Subfield.name == 'Subfield6') sf7 = Subfield.get(Subfield.name == 'Subfield7') sf8 = Subfield.get(Subfield.name == 'Subfield8') sf9 = Subfield.get(Subfield.name == 'Subfield9') assert sf1.field == f1 assert sf2.field == f1 assert sf3.field == f1 assert sf4.field == f2 assert sf5.field == f2 assert sf6.field == f2 assert sf7.field == f3 assert sf8.field == f3 assert sf9.field == f3
def _subfield( name='Field', abbreviations=None, field=None, ): if not field: field = Field.create(name='Parent') return Subfield.create( name=name, abbreviations=abbreviations, field=field, )
def test_field_facets(add_citation, add_subfield, add_subfield_document): """ field_facets() should provide a list of label/value/count dicts. """ f1 = Field.create(name="Field 1") f2 = Field.create(name="Field 2") f3 = Field.create(name="Field 3") sf1 = add_subfield(field=f1) sf2 = add_subfield(field=f2) sf3 = add_subfield(field=f3) for i in range(3): c = add_citation() add_subfield_document(subfield=sf1, document=c.document) for i in range(2): c = add_citation() add_subfield_document(subfield=sf2, document=c.document) for i in range(1): c = add_citation() add_subfield_document(subfield=sf3, document=c.document) Citation_Index.es_insert() Field_Index.es_insert() facets = field_facets() assert facets == [ dict(label="Field 1", value=f1.id, count=3), dict(label="Field 2", value=f2.id, count=2), dict(label="Field 3", value=f3.id, count=1), ]
def es_stream_docs(cls): """ Index fields. Yields: dict: The next document. """ for row in query_bar(Field.select()): yield dict( _id = row.id, name = row.name, )
def test_clean_field_names(): """ Field and subfield names should be sanitized. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/clean_field_names.csv', ) assert Field.select().where(Field.name == 'Field1') assert Subfield.select().where(Subfield.name == 'Subfield1') assert Subfield.select().where(Subfield.name == 'Subfield2') assert Subfield.select().where(Subfield.name == 'Subfield3')
def test_clean_field_names(): """ Field and subfield names should be sanitized. """ Subfield.ingest( 'osp.test.fields.models.subfield', 'fixtures/ingest/clean_field_names.csv', ) assert Field.select().where(Field.name=='Field1') assert Subfield.select().where(Subfield.name=='Subfield1') assert Subfield.select().where(Subfield.name=='Subfield2') assert Subfield.select().where(Subfield.name=='Subfield3')
def test_es_insert(): """ Field_Index.es_insert() should load all fields into Elasticsearch """ Subfield.ingest() Field_Index.es_insert() for field in Field.select(): doc = config.es.get( index='field', id=field.id, ) assert doc['_source']['name'] == field.name