def test_match_paths(add_doc, add_institution):

    """
    If the document has a path, it should be matched "greedily" against
    the institutions - find the institution with the longest shared path.
    """

    i1 = add_institution(url='http://yale.edu')
    i2 = add_institution(url='http://yale.edu/p1')
    i3 = add_institution(url='http://yale.edu/p1/p2')
    i4 = add_institution(url='http://yale.edu/p1/p2/p3')

    d1 = add_doc(log=dict(url='http://yale.edu/syllabus.pdf'))
    d2 = add_doc(log=dict(url='http://yale.edu/p1/syllabus.pdf'))
    d3 = add_doc(log=dict(url='http://yale.edu/p1/p2/syllabus.pdf'))
    d4 = add_doc(log=dict(url='http://yale.edu/p1/p2/p3/syllabus.pdf'))

    Institution_Document.link()

    for i, d in [
        (i1, d1),
        (i2, d2),
        (i3, d3),
        (i4, d4),
    ]:

        assert Institution_Document.select().where(
            Institution_Document.institution==i,
            Institution_Document.document==d,
        )
def test_link(add_doc, add_institution):

    """
    .link() should link documents -> institutions.
    """

    i1 = add_institution(url='http://d1.edu')
    i2 = add_institution(url='http://d2.edu')
    i3 = add_institution(url='http://d3.edu')

    d1 = add_doc(log=dict(url='http://d1.edu/syllabus.pdf'))
    d2 = add_doc(log=dict(url='http://d2.edu/syllabus.pdf'))
    d3 = add_doc(log=dict(url='http://d3.edu/syllabus.pdf'))

    Institution_Document.link()

    for i, d in [
        (i1, d1),
        (i2, d2),
        (i3, d3),
    ]:

        assert Institution_Document.select().where(
            Institution_Document.institution==i,
            Institution_Document.document==d,
        )
def test_match_subdomains(add_doc, add_institution):

    """
    If the document has a subdomain(s), it should be matched "greedily" against
    the institutions - find the institution with the most shared subdomains.
    """

    i1 = add_institution(url='http://yale.edu')
    i2 = add_institution(url='http://sub1.yale.edu')
    i3 = add_institution(url='http://sub2.sub1.yale.edu')
    i4 = add_institution(url='http://sub3.sub2.sub1.yale.edu')

    d1 = add_doc(log=dict(url='http://yale.edu/syllabus.pdf'))
    d2 = add_doc(log=dict(url='http://sub1.yale.edu/syllabus.pdf'))
    d3 = add_doc(log=dict(url='http://sub2.sub1.yale.edu/syllabus.pdf'))
    d4 = add_doc(log=dict(url='http://sub3.sub2.sub1.yale.edu/syllabus.pdf'))

    Institution_Document.link()

    for i, d in [
        (i1, d1),
        (i2, d2),
        (i3, d3),
        (i4, d4),
    ]:

        assert Institution_Document.select().where(
            Institution_Document.institution==i,
            Institution_Document.document==d,
        )
def doc_to_inst():

    """
    Link document -> institution.
    """

    Institution_Document.link()