Exemplo n.º 1
0
 def test_item_count(self):
     '''Test that item_count is picked up'''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_item_count.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], '0c0e6ee502a2afda21128841f0addf23')
     self.assertEqual(sdoc['item_count'], 2)
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], 'ark:/13030/ft009nb05r')
     self.assertNotIn('item_count', sdoc)
Exemplo n.º 2
0
 def test_item_count(self):
     """Test that item_count is picked up"""
     doc = json.load(open(DIR_FIXTURES + "/couchdb_item_count.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["id"], "0c0e6ee502a2afda21128841f0addf23")
     self.assertEqual(sdoc["item_count"], 2)
     doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["id"], "ark:/13030/ft009nb05r")
     self.assertNotIn("item_count", sdoc)
Exemplo n.º 3
0
 def test_decade_facet(self):
     '''Test generation of decade facet
     Currently generated from sourceResource.date.displayDate
     '''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['facet_decade'], set(['1880s', '1890s']))
     # no "date" in sourceResource
     doc = json.load(open(DIR_FIXTURES + '/couchdb_nocampus.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['facet_decade'], set(['unknown']))
Exemplo n.º 4
0
 def test_decade_facet(self):
     """Test generation of decade facet
     Currently generated from sourceResource.date.displayDate
     """
     doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["facet_decade"], set(["1880s", "1890s"]))
     # no "date" in sourceResource
     doc = json.load(open(DIR_FIXTURES + "/couchdb_nocampus.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["facet_decade"], set(["unknown"]))
Exemplo n.º 5
0
 def test_type_mapping(self):
     doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json"))
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret["type"], "image")
     doc["sourceResource"]["type"] = "moving image"
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret["type"], "moving image")
     doc["sourceResource"]["type"] = "movingimage"
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret["type"], "moving image")
     doc["sourceResource"]["type"] = "Physical ObjectXX"
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret["type"], "physical object")
     doc["sourceResource"]["type"] = "physicalobject"
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret["type"], "physical object")
Exemplo n.º 6
0
 def test_type_mapping(self):
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret['type'], 'image')
     doc['sourceResource']['type'] = 'moving image'
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret['type'], 'moving image')
     doc['sourceResource']['type'] = 'movingimage'
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret['type'], 'moving image')
     doc['sourceResource']['type'] = 'Physical ObjectXX'
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret['type'], 'physical object')
     doc['sourceResource']['type'] = 'physicalobject'
     ret = map_couch_to_solr_doc(doc)
     self.assertEqual(ret['type'], 'physical object')
Exemplo n.º 7
0
 def test_sort_title_string_only(self):
     '''Many of the sourceResource title fields are flat strings.
     Deal with this'''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_title_flat_string.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['sort_title'],
             'atlas negative collection image')
Exemplo n.º 8
0
 def test_map_date_not_a_list(self):
     """Test how the mapping works when the sourceResource/date is a dict
     not a list
     """
     doc = json.load(open(DIR_FIXTURES + "/couchdb_solr_date_map.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["date"], ["between 1885-1890"])
Exemplo n.º 9
0
 def test_map_date_not_a_list(self):
     '''Test how the mapping works when the sourceResource/date is a dict
     not a list
     '''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_solr_date_map.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['date'], ['between 1885-1890'])
Exemplo n.º 10
0
 def test_dejson_from_map(self):
     '''Test that the dejson works from the mapping function'''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_ucla.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['coverage'], [
         "Topanga (Calif.)", "Pacific Palisades, Los Angeles (Calif.)",
         "Venice (Los Angeles, Calif.)", "Los Angeles (Calif.)"
     ])
Exemplo n.º 11
0
 def test_sort_dates(self):
     """test the sort_date_start/end values"""
     doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["sort_date_start"], DT(1885, 1, 1, tzinfo=UTC))
     self.assertEqual(sdoc["sort_date_end"], DT(1890, 1, 1, tzinfo=UTC))
     doc = json.load(open(DIR_FIXTURES + "/couchdb_no_pretty_id.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["sort_date_start"], DT(2013, 9, 30, tzinfo=UTC))
     self.assertEqual(sdoc["sort_date_end"], DT(2013, 9, 30, tzinfo=UTC))
     doc = json.load(open(DIR_FIXTURES + "/couchdb_nocampus.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertNotIn("sort_date_start", sdoc)
     self.assertNotIn("sort_date_end", sdoc)
     doc = json.load(open(DIR_FIXTURES + "/couchdb_solr_date_map.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["sort_date_start"], DT(1885, 7, 4, tzinfo=UTC))
     self.assertEqual(sdoc["sort_date_end"], DT(1890, 8, 3, tzinfo=UTC))
Exemplo n.º 12
0
 def test_sort_dates(self):
     '''test the sort_date_start/end values'''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['sort_date_start'], DT(1885, 1, 1, tzinfo=UTC))
     self.assertEqual(sdoc['sort_date_end'], DT(1890, 1, 1, tzinfo=UTC))
     doc = json.load(open(DIR_FIXTURES + '/couchdb_no_pretty_id.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['sort_date_start'], DT(2013, 9, 30, tzinfo=UTC))
     self.assertEqual(sdoc['sort_date_end'], DT(2013, 9, 30, tzinfo=UTC))
     doc = json.load(open(DIR_FIXTURES + '/couchdb_nocampus.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertNotIn('sort_date_start', sdoc)
     self.assertNotIn('sort_date_end', sdoc)
     doc = json.load(open(DIR_FIXTURES + '/couchdb_solr_date_map.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['sort_date_start'], DT(1885, 7, 4, tzinfo=UTC))
     self.assertEqual(sdoc['sort_date_end'], DT(1890, 8, 3, tzinfo=UTC))
Exemplo n.º 13
0
 def test_map_couch_to_solr_no_campus(self):
     doc = json.load(open(DIR_FIXTURES + "/couchdb_nocampus.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertNotIn("campus", sdoc)
     self.assertNotIn("campus_url", sdoc)
     self.assertNotIn("campus_name", sdoc)
     self.assertNotIn("campus_data", sdoc)
     repo_data = ["https://registry.cdlib.org/api/v1/repository/4/::" "Bancroft Library"]
     self.assertEqual(sdoc["repository_data"], repo_data)
     self.assertEqual(sdoc["sort_title"], u"neighbor my neighbor what a happy boy")
     self.assertEqual(sdoc["type"], ["image", "physical object"])
Exemplo n.º 14
0
 def test_map_couch_to_solr_no_campus(self):
     doc = json.load(open(DIR_FIXTURES + '/couchdb_nocampus.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertNotIn('campus', sdoc)
     self.assertNotIn('campus_url', sdoc)
     self.assertNotIn('campus_name', sdoc)
     self.assertNotIn('campus_data', sdoc)
     repo_data = [
         'https://registry.cdlib.org/api/v1/repository/4/::'
         'Bancroft Library'
     ]
     self.assertEqual(sdoc['repository_data'], repo_data)
     self.assertEqual(sdoc['sort_title'],
                      u'neighbor my neighbor what a happy boy')
     self.assertEqual(sdoc['type'], ['image', 'physical object'])
Exemplo n.º 15
0
 def test_map_couch_to_solr_nuxeo_doc(self, mock_boto):
     '''Test the mapping of a couch db source json doc from Nuxeo
     to a solr schema compatible doc
     '''
     doc = json.load(
         open(DIR_FIXTURES +
              '/26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], '0025ad8f-a44e-4f58-8238-c7b60b2fb850')
     self.assertEqual(sdoc['harvest_id_s'],
                      '26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850')
     self.assertEqual(sdoc['title'], ['Brag'])
     self.assertEqual(sdoc['sort_title'], 'brag')
     self.assertEqual(sdoc['alternative_title'], ['test alt title'])
     self.assertEqual(sdoc['creator'], ['Dunya Ramicova'])
     self.assertEqual(sdoc['description'], [
         u'"Design #39; BRAG; Entourage; Principle Base A" written on '
         'drawing. No signature on drawing.',
         u'Director: David Pountney; Scene Designer: Robert Israel; '
         'Producer: English National Opera, London, UK',
         u'English National Opera'
     ])
     self.assertEqual(sdoc['reference_image_dimensions'], u'899:1199')
     self.assertEqual(sdoc['extent'], u'9" x 12" ')
     self.assertEqual(
         sdoc['format'],
         u'Graphite pencil, and Dr. Ph Martins Liquid Watercolor on '
         'watercolor paper')
     self.assertEqual(sdoc['genre'], ['Drawing'])
     self.assertNotIn('identifier', sdoc)
     self.assertEqual(sdoc['language'], ['English', 'eng'])
     self.assertEqual(sdoc['provenance'], u'Gift of the Naify Family')
     self.assertNotIn('publisher', sdoc)
     self.assertEqual(sdoc['relation'], [u'The Fairy Queen'])
     self.assertEqual(sdoc['rights'], [
         u'copyrighted',
         u'Creative Commons Attribution - NonCommercial-NoDerivatives '
         '(CC BY-NC-ND 4.0)'
     ])
     self.assertEqual(sdoc['structmap_text'], 'Brag')
     self.assertEqual(sdoc['structmap_url'],
                      u's3://static.ucldc.cdlib.org/media_json/'
                      '0025ad8f-a44e-4f58-8238-c7b60b2fb850-media.json')
     self.assertEqual(sdoc['subject'], [None])
     self.assertEqual(sdoc['type'], 'image')
     self.assertEqual(sdoc['location'], u'Box 13, Folder 25')
def main(collection_key):
    v = CouchDBCollectionFilter(
        couchdb_obj=get_couchdb(), collection_key=collection_key)
    solr_db = Solr(URL_SOLR)
    results = []
    for r in v:
        dt_start = dt_end = datetime.datetime.now()
        try:
            doc = fill_in_title(r.doc)
            has_required_fields(r.doc)
        except KeyError, e:
            print(e.message)
            continue
        solr_doc = map_couch_to_solr_doc(r.doc)
        results.append(solr_doc)
        solr_doc = push_doc_to_solr(solr_doc, solr_db=solr_db)
        dt_end = datetime.datetime.now()
Exemplo n.º 17
0
 def test_map_couch_to_solr_nuxeo_doc(self):
     """Test the mapping of a couch db source json doc from Nuxeo
     to a solr schema compatible doc
     """
     doc = json.load(open(DIR_FIXTURES + "/26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["id"], "0025ad8f-a44e-4f58-8238-c7b60b2fb850")
     self.assertEqual(sdoc["harvest_id_s"], "26098--0025ad8f-a44e-4f58-8238-c7b60b2fb850")
     self.assertEqual(sdoc["title"], ["Brag"])
     self.assertEqual(sdoc["sort_title"], "brag")
     self.assertEqual(sdoc["alternative_title"], ["test alt title"])
     self.assertEqual(sdoc["creator"], ["Dunya Ramicova"])
     self.assertEqual(
         sdoc["description"],
         [
             u'"Design #39; BRAG; Entourage; Principle Base A" written on ' "drawing. No signature on drawing.",
             u"Director: David Pountney; Scene Designer: Robert Israel; "
             "Producer: English National Opera, London, UK",
             u"English National Opera",
         ],
     )
     self.assertEqual(sdoc["reference_image_dimensions"], u"899:1199")
     self.assertEqual(sdoc["extent"], u'9" x 12" ')
     self.assertEqual(
         sdoc["format"], u"Graphite pencil, and Dr. Ph Martins Liquid Watercolor on " "watercolor paper"
     )
     self.assertEqual(sdoc["genre"], ["Drawing"])
     self.assertNotIn("identifier", sdoc)
     self.assertEqual(sdoc["language"], ["English", "eng"])
     self.assertEqual(sdoc["provenance"], [u"Gift of Dunya Ramicova, 2014"])
     self.assertNotIn("publisher", sdoc)
     self.assertEqual(sdoc["relation"], [u"The Fairy Queen"])
     self.assertEqual(
         sdoc["rights"],
         [u"copyrighted", u"Creative Commons Attribution - NonCommercial-NoDerivatives " "(CC BY-NC-ND 4.0)"],
     )
     self.assertEqual(sdoc["structmap_text"], "Brag")
     self.assertEqual(
         sdoc["structmap_url"],
         u"s3://static.ucldc.cdlib.org/media_json/" "0025ad8f-a44e-4f58-8238-c7b60b2fb850-media.json",
     )
     self.assertEqual(sdoc["subject"], [None])
     self.assertEqual(sdoc["type"], "image")
Exemplo n.º 18
0
 def test_sort_title_all_punctuation(self, mock_boto):
     doc = json.load(open(DIR_FIXTURES + '/couchdb_title_all_punc.json'))
     doc['sourceResource']['title'] = ['????$$%(@*#_!']
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], u'0025ad8f-a44e-4f58-8238-c7b60b2fb850')
     self.assertEqual(sdoc['sort_title'], '~title unknown')
Exemplo n.º 19
0
 def test_map_couch_to_solr_doc(self):
     '''Test the mapping of a couch db source json doc to a solr schema
     compatible doc.
     '''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], 'ark:/13030/ft009nb05r')
     self.assertEqual(sdoc['harvest_id_s'],
                      '23066--http://ark.cdlib.org/ark:/13030/ft009nb05r')
     self.assertEqual(sdoc['reference_image_md5'],
                      'f2610262f487f013fb96149f98990fb0')
     self.assertEqual(sdoc['reference_image_dimensions'], '1244:1500')
     self.assertEqual(sdoc['url_item'],
                      'http://ark.cdlib.org/ark:/13030/ft009nb05r')
     self.assertNotIn('item_count', sdoc)
     self.assertNotIn('campus', sdoc)
     self.assertEqual(sdoc['campus_url'],
                      [u'https://registry.cdlib.org/api/v1/campus/1/'])
     self.assertEqual(sdoc['campus_name'], [u'UC Berkeley'])
     self.assertEqual(
         sdoc['campus_data'],
         [u'https://registry.cdlib.org/api/v1/campus/1/::UC Berkeley'])
     self.assertNotIn('repository', sdoc)
     self.assertEqual(sdoc['repository_url'],
                      [u'https://registry.cdlib.org/api/v1/repository/4/'])
     self.assertEqual(sdoc['repository_name'], [u'Bancroft Library'])
     repo_data = [
         'https://registry.cdlib.org/api/v1/repository/4/::'
         'Bancroft Library::UC Berkeley'
     ]
     self.assertEqual(sdoc['repository_data'], repo_data)
     self.assertNotIn('collection', sdoc)
     self.assertEqual(
         sdoc['collection_url'],
         ['https://registry.cdlib.org/api/v1/collection/23066/'])
     self.assertEqual(sdoc['collection_name'],
                      ['Uchida (Yoshiko) photograph collection'])
     c_data = [
         'https://registry.cdlib.org/api/v1/collection/23066/::'
         'Uchida (Yoshiko) photograph collection'
     ]
     self.assertEqual(sdoc['collection_data'], c_data)
     self.assertEqual(sdoc['url_item'],
                      u'http://ark.cdlib.org/ark:/13030/ft009nb05r')
     self.assertEqual(sdoc['contributor'], ['contrib 1', 'contrib 2'])
     self.assertEqual(sdoc['spatial'], [
         'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)',
         'Tahquitz Stream', 'Tahquitz Canyon'
     ])
     self.assertEqual(sdoc['coverage'], [
         'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)',
         'Tahquitz Stream', 'Tahquitz Canyon'
     ])
     self.assertEqual(sdoc['creator'], [u'creator 1', u'creator 2'])
     self.assertEqual(
         sdoc['description'],
         [u'description 1', u'description 2', u'description 3'])
     self.assertEqual(sdoc['date'], ['between 1885-1890'])
     self.assertEqual(sdoc['language'], ['English'])
     self.assertEqual(sdoc['publisher'], [
         u'The Bancroft Library, University of California, Berkeley, '
         'Berkeley, CA 94720-6000, Phone: (510) 642-6481, Fax: (510) '
         '642-7589, Email: [email protected], '
         'URL: http://bancroft.berkeley.edu/'
     ]),
     self.assertEqual(sdoc['relation'], [
         u'http://www.oac.cdlib.org/findaid/ark:/13030/ft6k4007pc',
         u'http://bancroft.berkeley.edu/collections/jarda.html',
         u'hb158005k9', u'BANC PIC 1986.059--PIC',
         u'http://calisphere.universityofcalifornia.edu/',
         u'http://bancroft.berkeley.edu/'
     ])
     self.assertEqual(sdoc['rights'], [
         u'Transmission or reproduction of materials protected by '
         'copyright beyond that allowed by fair use requires the written '
         'permission of the copyright owners. Works not in the public '
         'domain cannot be commercially exploited without permission of '
         'the copyright owner. Responsibility for any use rests '
         'exclusively with the user.', u'The Bancroft Library--assigned',
         u'All requests to reproduce, publish, quote from, or otherwise '
         'use collection materials must be submitted in writing to the '
         'Head of Public Services, The Bancroft Library, University of '
         'California, Berkeley 94720-6000. See: '
         'http://bancroft.berkeley.edu/reference/permissions.html',
         u'University of California, Berkeley, Berkeley, CA 94720-6000, '
         'Phone: (510) 642-6481, Fax: (510) 642-7589, Email: '
         '*****@*****.**'
     ])
     self.assertEqual(sdoc['rights_uri'],
                      [u'http://rightsstatements.org/vocab/NoC-CR/1.0/'])
     self.assertEqual(sdoc['subject'], [
         u'Yoshiko Uchida photograph collection',
         u'Japanese American Relocation Digital Archive'
     ])
     self.assertEqual(sdoc['title'], [u'Neighbor'])
     self.assertEqual(sdoc['sort_title'], u'neighbor')
     self.assertEqual(sdoc['type'], u'image')
     self.assertEqual(sdoc['format'], 'mods')
     self.assertTrue('extent' not in sdoc)
     self.assertEqual(sdoc['sort_title'], u'neighbor')
     self.assertEqual(sdoc['temporal'], [u'1964-1965'])
Exemplo n.º 20
0
 def test_map_couch_to_solr_doc(self):
     '''Test the mapping of a couch db source json doc to a solr schema
     compatible doc.
     '''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], 'ark:/13030/ft009nb05r')
     self.assertEqual(sdoc['harvest_id_s'],
                      '23066--http://ark.cdlib.org/ark:/13030/ft009nb05r')
     self.assertEqual(sdoc['reference_image_md5'],
                      'f2610262f487f013fb96149f98990fb0')
     self.assertEqual(sdoc['reference_image_dimensions'], '1244:1500')
     self.assertEqual(sdoc['url_item'],
                      'http://ark.cdlib.org/ark:/13030/ft009nb05r')
     self.assertNotIn('item_count', sdoc)
     self.assertNotIn('campus', sdoc)
     self.assertEqual(sdoc['campus_url'],
                      [u'https://registry.cdlib.org/api/v1/campus/1/'])
     self.assertEqual(sdoc['campus_name'], [u'UC Berkeley'])
     self.assertEqual(
         sdoc['campus_data'],
         [u'https://registry.cdlib.org/api/v1/campus/1/::UC Berkeley'])
     self.assertNotIn('repository', sdoc)
     self.assertEqual(sdoc['repository_url'],
                      [u'https://registry.cdlib.org/api/v1/repository/4/'])
     self.assertEqual(sdoc['repository_name'], [u'Bancroft Library'])
     repo_data = [
         'https://registry.cdlib.org/api/v1/repository/4/::'
         'Bancroft Library::UC Berkeley'
     ]
     self.assertEqual(sdoc['repository_data'], repo_data)
     self.assertNotIn('collection', sdoc)
     self.assertEqual(
         sdoc['collection_url'],
         ['https://registry.cdlib.org/api/v1/collection/23066/'])
     self.assertEqual(sdoc['collection_name'],
                      ['Uchida (Yoshiko) photograph collection'])
     c_data = [
         'https://registry.cdlib.org/api/v1/collection/23066/::'
         'Uchida (Yoshiko) photograph collection'
     ]
     self.assertEqual(sdoc['collection_data'], c_data)
     self.assertEqual(sdoc['url_item'],
                      u'http://ark.cdlib.org/ark:/13030/ft009nb05r')
     self.assertEqual(sdoc['contributor'], ['contrib 1', 'contrib 2'])
     self.assertEqual(sdoc['spatial'], [
         'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)',
         'Tahquitz Stream', 'Tahquitz Canyon'
     ])
     self.assertEqual(sdoc['coverage'], [
         'Palm Springs (Calif.)', 'San Jacinto Mountains (Calif.)',
         'Tahquitz Stream', 'Tahquitz Canyon'
     ])
     self.assertEqual(sdoc['creator'], [u'creator 1', u'creator 2'])
     self.assertEqual(sdoc['description'], [
         u'description 1', u'description 2', u'description 3'
     ])
     self.assertEqual(sdoc['date'], ['between 1885-1890'])
     self.assertEqual(sdoc['language'], ['English'])
     self.assertEqual(sdoc['publisher'], [
         u'The Bancroft Library, University of California, Berkeley, '
         'Berkeley, CA 94720-6000, Phone: (510) 642-6481, Fax: (510) '
         '642-7589, Email: [email protected], '
         'URL: http://bancroft.berkeley.edu/'
     ]),
     self.assertEqual(sdoc['relation'], [
         u'http://www.oac.cdlib.org/findaid/ark:/13030/ft6k4007pc',
         u'http://bancroft.berkeley.edu/collections/jarda.html',
         u'hb158005k9', u'BANC PIC 1986.059--PIC',
         u'http://calisphere.universityofcalifornia.edu/',
         u'http://bancroft.berkeley.edu/'
     ])
     self.assertEqual(sdoc['rights'], [
         u'Transmission or reproduction of materials protected by '
         'copyright beyond that allowed by fair use requires the written '
         'permission of the copyright owners. Works not in the public '
         'domain cannot be commercially exploited without permission of '
         'the copyright owner. Responsibility for any use rests '
         'exclusively with the user.', u'The Bancroft Library--assigned',
         u'All requests to reproduce, publish, quote from, or otherwise '
         'use collection materials must be submitted in writing to the '
         'Head of Public Services, The Bancroft Library, University of '
         'California, Berkeley 94720-6000. See: '
         'http://bancroft.berkeley.edu/reference/permissions.html',
         u'University of California, Berkeley, Berkeley, CA 94720-6000, '
         'Phone: (510) 642-6481, Fax: (510) 642-7589, Email: '
         '*****@*****.**'
     ])
     self.assertEqual(sdoc['subject'], [
         u'Yoshiko Uchida photograph collection',
         u'Japanese American Relocation Digital Archive'
     ])
     self.assertEqual(sdoc['title'], [u'Neighbor'])
     self.assertEqual(sdoc['sort_title'], u'neighbor')
     self.assertEqual(sdoc['type'], u'image')
     self.assertEqual(sdoc['format'], 'mods')
     self.assertTrue('extent' not in sdoc)
     self.assertEqual(sdoc['sort_title'], u'neighbor')
     self.assertEqual(sdoc['temporal'], [u'1964-1965'])
Exemplo n.º 21
0
 def test_sort_title_all_punctuation(self, mock_boto):
     doc = json.load(open(DIR_FIXTURES + '/couchdb_title_all_punc.json'))
     doc['sourceResource']['title'] = ['????$$%(@*#_!']
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc['id'], u'0025ad8f-a44e-4f58-8238-c7b60b2fb850')
     self.assertEqual(sdoc['sort_title'], '~title unknown')
Exemplo n.º 22
0
 def test_push_doc_to_solr(self, mock_solr):
     '''Unit test calls to solr'''
     doc = json.load(open(DIR_FIXTURES + '/couchdb_doc.json'))
     sdoc = map_couch_to_solr_doc(doc)
     push_doc_to_solr(sdoc, mock_solr)
Exemplo n.º 23
0
 def test_map_couch_to_solr_doc(self):
     """Test the mapping of a couch db source json doc to a solr schema
     compatible doc.
     """
     doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json"))
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["id"], "ark:/13030/ft009nb05r")
     self.assertEqual(sdoc["harvest_id_s"], "23066--http://ark.cdlib.org/ark:/13030/ft009nb05r")
     self.assertEqual(sdoc["reference_image_md5"], "f2610262f487f013fb96149f98990fb0")
     self.assertEqual(sdoc["reference_image_dimensions"], "1244:1500")
     self.assertEqual(sdoc["url_item"], "http://ark.cdlib.org/ark:/13030/ft009nb05r")
     self.assertNotIn("item_count", sdoc)
     self.assertNotIn("campus", sdoc)
     self.assertEqual(sdoc["campus_url"], [u"https://registry.cdlib.org/api/v1/campus/1/"])
     self.assertEqual(sdoc["campus_name"], [u"UC Berkeley"])
     self.assertEqual(sdoc["campus_data"], [u"https://registry.cdlib.org/api/v1/campus/1/::UC Berkeley"])
     self.assertNotIn("repository", sdoc)
     self.assertEqual(sdoc["repository_url"], [u"https://registry.cdlib.org/api/v1/repository/4/"])
     self.assertEqual(sdoc["repository_name"], [u"Bancroft Library"])
     repo_data = ["https://registry.cdlib.org/api/v1/repository/4/::" "Bancroft Library::UC Berkeley"]
     self.assertEqual(sdoc["repository_data"], repo_data)
     self.assertNotIn("collection", sdoc)
     self.assertEqual(sdoc["collection_url"], ["https://registry.cdlib.org/api/v1/collection/23066/"])
     self.assertEqual(sdoc["collection_name"], ["Uchida (Yoshiko) photograph collection"])
     c_data = ["https://registry.cdlib.org/api/v1/collection/23066/::" "Uchida (Yoshiko) photograph collection"]
     self.assertEqual(sdoc["collection_data"], c_data)
     self.assertEqual(sdoc["url_item"], u"http://ark.cdlib.org/ark:/13030/ft009nb05r")
     self.assertEqual(sdoc["contributor"], ["contrib 1", "contrib 2"])
     self.assertEqual(
         sdoc["spatial"],
         ["Palm Springs (Calif.)", "San Jacinto Mountains (Calif.)", "Tahquitz Stream", "Tahquitz Canyon"],
     )
     self.assertEqual(
         sdoc["coverage"],
         ["Palm Springs (Calif.)", "San Jacinto Mountains (Calif.)", "Tahquitz Stream", "Tahquitz Canyon"],
     )
     self.assertEqual(sdoc["creator"], [u"creator 1", u"creator 2"])
     self.assertEqual(sdoc["description"], [u"description 1", u"description 2", u"description 3"])
     self.assertEqual(sdoc["date"], ["between 1885-1890"])
     self.assertEqual(sdoc["language"], ["English"])
     self.assertEqual(
         sdoc["publisher"],
         [
             u"The Bancroft Library, University of California, Berkeley, "
             "Berkeley, CA 94720-6000, Phone: (510) 642-6481, Fax: (510) "
             "642-7589, Email: [email protected], "
             "URL: http://bancroft.berkeley.edu/"
         ],
     ),
     self.assertEqual(
         sdoc["relation"],
         [
             u"http://www.oac.cdlib.org/findaid/ark:/13030/ft6k4007pc",
             u"http://bancroft.berkeley.edu/collections/jarda.html",
             u"hb158005k9",
             u"BANC PIC 1986.059--PIC",
             u"http://calisphere.universityofcalifornia.edu/",
             u"http://bancroft.berkeley.edu/",
         ],
     )
     self.assertEqual(
         sdoc["rights"],
         [
             u"Transmission or reproduction of materials protected by "
             "copyright beyond that allowed by fair use requires the written "
             "permission of the copyright owners. Works not in the public "
             "domain cannot be commercially exploited without permission of "
             "the copyright owner. Responsibility for any use rests "
             "exclusively with the user.",
             u"The Bancroft Library--assigned",
             u"All requests to reproduce, publish, quote from, or otherwise "
             "use collection materials must be submitted in writing to the "
             "Head of Public Services, The Bancroft Library, University of "
             "California, Berkeley 94720-6000. See: "
             "http://bancroft.berkeley.edu/reference/permissions.html",
             u"University of California, Berkeley, Berkeley, CA 94720-6000, "
             "Phone: (510) 642-6481, Fax: (510) 642-7589, Email: "
             "*****@*****.**",
         ],
     )
     self.assertEqual(
         sdoc["subject"], [u"Yoshiko Uchida photograph collection", u"Japanese American Relocation Digital Archive"]
     )
     self.assertEqual(sdoc["title"], [u"Neighbor"])
     self.assertEqual(sdoc["sort_title"], u"neighbor")
     self.assertEqual(sdoc["type"], u"image")
     self.assertEqual(sdoc["format"], "mods")
     self.assertTrue("extent" not in sdoc)
     self.assertEqual(sdoc["sort_title"], u"neighbor")
     self.assertEqual(sdoc["temporal"], [u"1964-1965"])
Exemplo n.º 24
0
 def test_push_doc_to_solr(self, mock_solr):
     """Unit test calls to solr"""
     doc = json.load(open(DIR_FIXTURES + "/couchdb_doc.json"))
     sdoc = map_couch_to_solr_doc(doc)
     push_doc_to_solr(sdoc, mock_solr)
Exemplo n.º 25
0
 def test_sort_title_all_punctuation(self):
     doc = json.load(open(DIR_FIXTURES + "/couchdb_title_all_punc.json"))
     doc["sourceResource"]["title"] = ["????$$%(@*#_!"]
     sdoc = map_couch_to_solr_doc(doc)
     self.assertEqual(sdoc["id"], u"0025ad8f-a44e-4f58-8238-c7b60b2fb850")
     self.assertEqual(sdoc["sort_title"], "~title unknown")