def test_index_institution_refs(add_citation, add_institution):
    """
    When the document is linked with an institution, an institution reference
    should be included in the document.
    """

    citation = add_citation()

    institution = add_institution(state='CA', country='US')

    # Link inst -> citation.
    Institution_Document.create(
        institution=institution,
        document=citation.document,
    )

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['institution_id'] == institution.id
    assert doc['_source']['state'] == 'CA'
    assert doc['_source']['country'] == 'US'
def test_index_institution_refs(add_citation, add_institution):

    """
    When the document is linked with an institution, an institution reference
    should be included in the document.
    """

    citation = add_citation()

    institution = add_institution(state='CA', country='US')

    # Link inst -> citation.
    Institution_Document.create(
        institution=institution,
        document=citation.document,
    )

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['institution_id'] == institution.id
    assert doc['_source']['state'] == 'CA'
    assert doc['_source']['country'] == 'US'
def test_institution(add_doc, add_institution, add_citation):

    """
    Citation#institution should provide the document's institution.
    """

    document = add_doc()

    institution = add_institution()

    # Link inst -> document.
    Institution_Document.create(
        institution=institution,
        document=document,
    )

    citation = add_citation(document=document)

    assert citation.institution.id == institution.id
Exemplo n.º 4
0
def test_unique_pairs(add_doc, add_institution):
    """
    Don't allow duplicate links between the same doc -> inst pair.
    """

    inst = add_institution()

    doc = add_doc()

    Institution_Document.create(
        institution=inst,
        document=doc,
    )

    with pytest.raises(IntegrityError):

        Institution_Document.create(
            institution=inst,
            document=doc,
        )
def doc_to_inst(doc_id):

    """
    Match a document with an institution.
    """

    doc = Document.get(Document.id==doc_id)

    inst = (
        Institution
        .select()
        .where(Institution.domain==doc.syllabus.domain)
        .first()
    )

    if inst:

        Institution_Document.create(
            institution=inst,
            document=doc,
        )
Exemplo n.º 6
0
def test_country_facets(add_institution, add_citation):
    """
    country_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(country='AU')
    i2 = add_institution(country='CA')
    i3 = add_institution(country='NZ')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = country_facets()

    assert facets == [
        dict(label='Australia', value=i1.country, count=3),
        dict(label='Canada', value=i2.country, count=2),
        dict(label='New Zealand', value=i3.country, count=1),
    ]
def test_filter_country(add_text, add_citation, add_institution):

    """
    Filter on country as a keyword value.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution(country='USA')
    i2 = add_institution(country='CAN')

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        country='USA'
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_filter_institution(add_text, add_citation, add_institution):

    """
    Filter by institution.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution()
    i2 = add_institution()

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        institution_id=i1.id
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_merge_included(add_institution, add_citation):
    """
    When ids are passed for institutions that fall outside of the default page,
    merge the extra facets into the baseline ranking.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()
    Institution_Index.es_insert()

    facets = institution_facets(depth=2, include=[i2.id, i3.id])

    assert facets == [
        dict(label='Institution 1', value=i1.id, count=3),
        dict(label='Institution 2', value=i2.id, count=2),  # Dedupe 2.
        dict(label='Institution 3', value=i3.id, count=1),  # Append 3.
    ]
Exemplo n.º 10
0
def test_state_facets(add_institution, add_citation):
    """
    state_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(state='CA')
    i2 = add_institution(state='AL')
    i3 = add_institution(state='MA')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = state_facets()

    assert facets == [
        dict(label='California', value=i1.state, count=3),
        dict(label='Alabama', value=i2.state, count=2),
        dict(label='Massachusetts', value=i3.state, count=1),
    ]
Exemplo n.º 11
0
def test_filter_country(add_text, add_citation, add_institution):
    """
    Filter on country as a keyword value.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution(country='USA')
    i2 = add_institution(country='CAN')

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(country='USA'))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Exemplo n.º 12
0
def test_filter_institution(add_text, add_citation, add_institution):
    """
    Filter by institution.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution()
    i2 = add_institution()

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(institution_id=i1.id))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_state_facets(add_institution, add_citation):

    """
    state_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(state='CA')
    i2 = add_institution(state='AL')
    i3 = add_institution(state='MA')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = state_facets()

    assert facets == [
        dict(label='California', value=i1.state, count=3),
        dict(label='Alabama', value=i2.state, count=2),
        dict(label='Massachusetts', value=i3.state, count=1),
    ]
def test_merge_included_facets(add_institution, add_citation):

    """
    Don't duplicate included facets are already present in the ranking.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    counts = Citation_Index.count_facets(
        'institution_id',
        include=[i2.id, i3.id],
    )

    # Dedupe 2 and 3.
    assert counts == [
        (i1.id, 3),
        (i2.id, 2),
        (i3.id, 1),
    ]
def test_institution_facets(add_institution, add_citation):
    """
    institution_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()
    Institution_Index.es_insert()

    facets = institution_facets()

    assert facets == [
        dict(label='Institution 1', value=i1.id, count=3),
        dict(label='Institution 2', value=i2.id, count=2),
        dict(label='Institution 3', value=i3.id, count=1),
    ]
def test_institution_facets(add_institution, add_citation):

    """
    institution_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()
    Institution_Index.es_insert()

    facets = institution_facets()

    assert facets == [
        dict(label='Institution 1', value=i1.id, count=3),
        dict(label='Institution 2', value=i2.id, count=2),
        dict(label='Institution 3', value=i3.id, count=1),
    ]
def test_country_facets(add_institution, add_citation):

    """
    country_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(country='AU')
    i2 = add_institution(country='CA')
    i3 = add_institution(country='NZ')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = country_facets()

    assert facets == [
        dict(label='Australia', value=i1.country, count=3),
        dict(label='Canada', value=i2.country, count=2),
        dict(label='New Zealand', value=i3.country, count=1),
    ]
def test_append_included_facets(add_institution, add_citation):

    """
    When "included" facets have counts that put them below of the baseline
    ranking, append the extra facets to the bottom of the list.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')
    i4 = add_institution(name='Institution 4')

    for i in range(4):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i4, document=c.document)

    Citation_Index.es_insert()

    counts = Citation_Index.count_facets(
        'institution_id',
        include=[i3.id, i4.id],
        depth=2,
    )

    assert counts == [

        (i1.id, 4),
        (i2.id, 3),

        # Include 3 and 4.
        (i3.id, 2),
        (i4.id, 1),

    ]