Пример #1
0
    def ingest(cls, package='osp.fields', path='data/fields.csv'):
        """
        Ingest fields.

        Args:
            package (str)
            path (str)
        """

        reader = read_csv(package, path)

        for row in reader:

            # Sanifize field names.
            pf = clean_field_name(row['Primary Field'])
            sf = clean_field_name(row['Secondary Field'])

            # Parse abbreviations.
            abbrs = parse_abbrs(row['ABBRV'])
            if abbrs: abbrs = filter_abbrs(abbrs)

            # If parent field, write row.
            if bool(row['Alpha Category']):
                Field.create(name=pf)

            # Query for a parent field.
            field = Field.select().where(Field.name == pf).first()

            if field:

                Subfield.create(
                    name=sf,
                    abbreviations=abbrs,
                    field=field,
                )
Пример #2
0
    def ingest(cls, package="osp.fields", path="data/fields.csv"):

        """
        Ingest fields.

        Args:
            package (str)
            path (str)
        """

        reader = read_csv(package, path)

        for row in reader:

            # Sanifize field names.
            pf = clean_field_name(row["Primary Field"])
            sf = clean_field_name(row["Secondary Field"])

            # Parse abbreviations.
            abbrs = parse_abbrs(row["ABBRV"])
            if abbrs:
                abbrs = filter_abbrs(abbrs)

            # If parent field, write row.
            if bool(row["Alpha Category"]):
                Field.create(name=pf)

            # Query for a parent field.
            field = Field.select().where(Field.name == pf).first()

            if field:

                Subfield.create(name=sf, abbreviations=abbrs, field=field)
Пример #3
0
def test_field_facets(add_citation, add_subfield, add_subfield_document):
    """
    field_facets() should provide a list of label/value/count dicts.
    """

    f1 = Field.create(name='Field 1')
    f2 = Field.create(name='Field 2')
    f3 = Field.create(name='Field 3')

    sf1 = add_subfield(field=f1)
    sf2 = add_subfield(field=f2)
    sf3 = add_subfield(field=f3)

    for i in range(3):
        c = add_citation()
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation()
        add_subfield_document(subfield=sf2, document=c.document)

    for i in range(1):
        c = add_citation()
        add_subfield_document(subfield=sf3, document=c.document)

    Citation_Index.es_insert()
    Field_Index.es_insert()

    facets = field_facets()

    assert facets == [
        dict(label='Field 1', value=f1.id, count=3),
        dict(label='Field 2', value=f2.id, count=2),
        dict(label='Field 3', value=f3.id, count=1),
    ]
def test_insert_rows():

    """
    Subfield.ingest() should load field and subfield rows.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/insert_rows.csv',
    )

    assert Field.select().count() == 3
    assert Subfield.select().count() == 9

    f1 = Field.get(Field.name=='Field1')
    f2 = Field.get(Field.name=='Field2')
    f3 = Field.get(Field.name=='Field3')

    sf1 = Subfield.get(Subfield.name=='Subfield1')
    sf2 = Subfield.get(Subfield.name=='Subfield2')
    sf3 = Subfield.get(Subfield.name=='Subfield3')
    sf4 = Subfield.get(Subfield.name=='Subfield4')
    sf5 = Subfield.get(Subfield.name=='Subfield5')
    sf6 = Subfield.get(Subfield.name=='Subfield6')
    sf7 = Subfield.get(Subfield.name=='Subfield7')
    sf8 = Subfield.get(Subfield.name=='Subfield8')
    sf9 = Subfield.get(Subfield.name=='Subfield9')

    assert sf1.field == f1
    assert sf2.field == f1
    assert sf3.field == f1

    assert sf4.field == f2
    assert sf5.field == f2
    assert sf6.field == f2

    assert sf7.field == f3
    assert sf8.field == f3
    assert sf9.field == f3
def test_insert_rows():
    """
    Subfield.ingest() should load field and subfield rows.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/insert_rows.csv',
    )

    assert Field.select().count() == 3
    assert Subfield.select().count() == 9

    f1 = Field.get(Field.name == 'Field1')
    f2 = Field.get(Field.name == 'Field2')
    f3 = Field.get(Field.name == 'Field3')

    sf1 = Subfield.get(Subfield.name == 'Subfield1')
    sf2 = Subfield.get(Subfield.name == 'Subfield2')
    sf3 = Subfield.get(Subfield.name == 'Subfield3')
    sf4 = Subfield.get(Subfield.name == 'Subfield4')
    sf5 = Subfield.get(Subfield.name == 'Subfield5')
    sf6 = Subfield.get(Subfield.name == 'Subfield6')
    sf7 = Subfield.get(Subfield.name == 'Subfield7')
    sf8 = Subfield.get(Subfield.name == 'Subfield8')
    sf9 = Subfield.get(Subfield.name == 'Subfield9')

    assert sf1.field == f1
    assert sf2.field == f1
    assert sf3.field == f1

    assert sf4.field == f2
    assert sf5.field == f2
    assert sf6.field == f2

    assert sf7.field == f3
    assert sf8.field == f3
    assert sf9.field == f3
    def _subfield(
        name='Field',
        abbreviations=None,
        field=None,
    ):

        if not field:
            field = Field.create(name='Parent')

        return Subfield.create(
            name=name,
            abbreviations=abbreviations,
            field=field,
        )
Пример #7
0
    def _subfield(
        name='Field',
        abbreviations=None,
        field=None,
    ):

        if not field:
            field = Field.create(name='Parent')

        return Subfield.create(
            name=name,
            abbreviations=abbreviations,
            field=field,
        )
def test_field_facets(add_citation, add_subfield, add_subfield_document):

    """
    field_facets() should provide a list of label/value/count dicts.
    """

    f1 = Field.create(name="Field 1")
    f2 = Field.create(name="Field 2")
    f3 = Field.create(name="Field 3")

    sf1 = add_subfield(field=f1)
    sf2 = add_subfield(field=f2)
    sf3 = add_subfield(field=f3)

    for i in range(3):
        c = add_citation()
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation()
        add_subfield_document(subfield=sf2, document=c.document)

    for i in range(1):
        c = add_citation()
        add_subfield_document(subfield=sf3, document=c.document)

    Citation_Index.es_insert()
    Field_Index.es_insert()

    facets = field_facets()

    assert facets == [
        dict(label="Field 1", value=f1.id, count=3),
        dict(label="Field 2", value=f2.id, count=2),
        dict(label="Field 3", value=f3.id, count=1),
    ]
Пример #9
0
    def es_stream_docs(cls):

        """
        Index fields.

        Yields:
            dict: The next document.
        """

        for row in query_bar(Field.select()):

            yield dict(
                _id = row.id,
                name = row.name,
            )
def test_clean_field_names():
    """
    Field and subfield names should be sanitized.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/clean_field_names.csv',
    )

    assert Field.select().where(Field.name == 'Field1')

    assert Subfield.select().where(Subfield.name == 'Subfield1')
    assert Subfield.select().where(Subfield.name == 'Subfield2')
    assert Subfield.select().where(Subfield.name == 'Subfield3')
    def es_stream_docs(cls):

        """
        Index fields.

        Yields:
            dict: The next document.
        """

        for row in query_bar(Field.select()):

            yield dict(
                _id = row.id,
                name = row.name,
            )
def test_clean_field_names():

    """
    Field and subfield names should be sanitized.
    """

    Subfield.ingest(
        'osp.test.fields.models.subfield',
        'fixtures/ingest/clean_field_names.csv',
    )

    assert Field.select().where(Field.name=='Field1')

    assert Subfield.select().where(Subfield.name=='Subfield1')
    assert Subfield.select().where(Subfield.name=='Subfield2')
    assert Subfield.select().where(Subfield.name=='Subfield3')
Пример #13
0
def test_es_insert():

    """
    Field_Index.es_insert() should load all fields into Elasticsearch
    """

    Subfield.ingest()

    Field_Index.es_insert()

    for field in Field.select():

        doc = config.es.get(
            index='field',
            id=field.id,
        )

        assert doc['_source']['name'] == field.name