def testIterateOverResults(self): '''Test the iteration over a mock set of data''' httpretty.register_uri( httpretty.POST, 'http://example.edu/solr/select', responses=[ httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-0.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-1.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-2.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-3.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-4.xml').read()) ]) h = fetcher.SolrFetcher('http://example.edu/solr', 'extra_data', rows=3) self.assertEqual(len(h.resp.results), 3) n = 0 for r in h: n += 1 self.assertEqual(['Mission at Santa Barbara'], r['title_tesim']) self.assertEqual(n, 10)
def testFetch(self): httpretty.register_uri( httpretty.GET, 'http://digitalcollections.hoover.org/search/*/objects/xml?filter=approved:true&page=1', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/eMuseum-page-1.xml').read()), httpretty.Response(body=open(DIR_FIXTURES + '/eMuseum-page-2.xml').read()), httpretty.Response(body=open(DIR_FIXTURES + '/eMuseum-page-3.xml').read()), ]) url = 'http://digitalcollections.hoover.org' h = fetcher.eMuseum_Fetcher(url, None) self.assertEqual(h.url_base, url) docs = [] d = h.next() docs.extend(d) for d in h: docs.extend(d) self.assertEqual(len(docs), 24) test1 = docs[12] self.assertIn('title', test1) self.assertEqual( test1['title']['text'], 'Money is power. A war savings certificate in every Canadian home. Get yours now at post offices or banks.' ) self.assertIn('unknown2', test1) self.assertIn('text2', test1['primaryMaker']) self.assertNotIn('attrib', test1['unknown1'])
def testSolrHarvest(self, mock_boto3): '''Test the function of the Solr harvest with <date> objects in stream''' httpretty.register_uri( httpretty.POST, 'http://example.edu/solr/blacklight/select', responses=[ httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-0.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-1.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-2.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-3.xml').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-4.xml').read()) ]) self.assertTrue(hasattr(self.controller, 'harvest')) self.controller.harvest() print "LOGS:{}".format(self.test_log_handler.formatted_records) self.assertEqual(len(self.test_log_handler.records), 2) self.assertTrue( 'UC San Diego' in self.test_log_handler.formatted_records[0]) self.assertEqual(self.test_log_handler.formatted_records[1], '[INFO] HarvestController: 13 records harvested')
def testIterateOverResults(self): '''Test the iteration over a mock set of data''' httpretty.register_uri( httpretty.GET, 'http://example.edu/solr/query', responses=[ httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-0.json').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-1.json').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-2.json').read()), httpretty.Response(body=open( DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-3.json').read()), ]) self.assertRaises(TypeError, fetcher.PySolrFetcher) h = fetcher.PySolrQueryFetcher('http://example.edu/solr', 'extra_data', **{'rows': 3}) self.assertEqual( h._query_path, 'query?q=extra_data&sort=id+asc&cursorMark=%2A&wt=json&rows=3') n = 0 for r in h: n += 1 self.assertEqual(n, 10) self.assertEqual(['Mission Santa Ynez'], r['title_tesim'])
def testFetch(self): '''Test the httpretty mocked fetching of documents''' url = 'https://example.edu/action/search/xml?q=ddu%3A20*&' \ 'asf=ddu&asd=&fd=1&_hd=&hd=on&sf=&_rs=&_ef=&ef=on&sd=&ed=&c=ga' httpretty.register_uri( httpretty.GET, url, responses=[ httpretty.Response(open(DIR_FIXTURES + '/ucsf-page-1.xml').read(), status=200), httpretty.Response(open(DIR_FIXTURES + '/ucsf-page-1.xml').read(), status=200), httpretty.Response(open(DIR_FIXTURES + '/ucsf-page-2.xml').read(), status=200), httpretty.Response(open(DIR_FIXTURES + '/ucsf-page-3.xml').read(), status=200), ]) h = fetcher.UCSF_XML_Fetcher(url, None, page_size=3) docs = [] for d in h: docs.extend(d) self.assertEqual(len(docs), 7) testy = docs[0] self.assertIn('tid', testy) self.assertEqual(testy['tid'], "nga13j00") self.assertEqual(testy['uri'], 'http://legacy.library.ucsf.edu/tid/nga13j00') self.assertIn('aup', testy['metadata']) self.assertEqual(testy['metadata']['aup'], ['Whent, Peter'])
def testFetch(self, mock_deepharvest, mock_boto): '''Test the httpretty mocked fetching of documents''' media_json = open(DIR_FIXTURES + '/nuxeo_media_structmap.json').read() deepharvest_mocker(mock_deepharvest) mock_boto.return_value.get_bucket.return_value.\ get_key.return_value.\ get_contents_as_string.return_value = media_json httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/path-to-asset/here/@children', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_folder.json').read(), status=200), httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_folder-1.json').read(), status=200), ]) httpretty.register_uri(httpretty.GET, re.compile('https://example.edu/api/v1/id/.*'), body=open(DIR_FIXTURES + '/nuxeo_doc.json').read()) httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/asset-library/UCI/Cochems' '/MS-R016_1092.tif/@children?currentPageIndex=0', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_no_children.json').read(), status=200), ]) h = fetcher.NuxeoFetcher('https://example.edu/api/v1', 'path-to-asset/here') mock_deepharvest.assert_called_with( 'path-to-asset/here', '', conf_pynux={'api': 'https://example.edu/api/v1'}) docs = [] for d in h: docs.append(d) self.assertEqual(3, len(docs)) self.assertIn('picture:views', docs[0]['properties']) self.assertIn('dc:subjects', docs[0]['properties']) self.assertIn('structmap_url', docs[0]) self.assertIn('structmap_text', docs[0]) self.assertEqual( docs[0]['structmap_text'], "Angela Davis socializing with students at UC Irvine " "AS-061_A69-013_001.tif AS-061_A69-013_002.tif " "AS-061_A69-013_003.tif AS-061_A69-013_004.tif " "AS-061_A69-013_005.tif AS-061_A69-013_006.tif " "AS-061_A69-013_007.tif") self.assertEqual( docs[0]['isShownBy'], 'https://nuxeo.cdlib.org/Nuxeo/nxpicsfile/default/' '40677ed1-f7c2-476f-886d-bf79c3fec8c4/Medium:content/')
def test_fetching_range(self): url = 'https://example.edu' user_id = 'testuser' page_size = 3 url_first = fetcher.Flickr_Fetcher.url_get_user_photos_template.format( api_key='boguskey', user_id=user_id, per_page=page_size, page=1) # Ugly but works httpretty.register_uri( httpretty.GET, url_first, responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/flickr-public-photos-1.xml') .read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-public-photos-1.xml') .read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-public-photos-1.xml') .read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), ] ) h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size, page_range='3,5') h.doc_current = 10 self.assertRaises(ValueError, h.next) h.docs_fetched = 4 h.doc_current = 4 self.assertRaises(StopIteration, h.next) h.docs_fetched = 2 h.doc_current = 2 h.page_current = 5 self.assertRaises(StopIteration, h.next) h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size, page_range='3,5') total = 0 all_objs = [] for objs in h: total += len(objs) all_objs.extend(objs) self.assertEqual(total, 4) self.assertEqual(len(all_objs), 4)
def test_fetching(self): url = 'https://example.edu' playlist_id = 'testplaylist' page_size = 3 url_first = fetcher.YouTube_Fetcher.url_playlistitems.format( api_key='boguskey', page_size=page_size, playlist_id=playlist_id, page_token='') url_vids = fetcher.YouTube_Fetcher.url_video # Ugly but works httpretty.register_uri( httpretty.GET, url_first, responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/youtube_playlist_with_next.json').read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/youtube_playlist_no_next.json').read(), status=200), ]) httpretty.register_uri(httpretty.GET, url_vids, body=open(DIR_FIXTURES + '/youtube_video.json').read(), status=200) h = fetcher.YouTube_Fetcher(url, playlist_id, page_size=page_size) vids = [] for v in h: vids.extend(v) self.assertEqual(len(vids), 6) self.assertEqual( vids[0], { u'contentDetails': { u'definition': u'sd', u'projection': u'rectangular', u'caption': u'false', u'duration': u'PT19M35S', u'licensedContent': True, u'dimension': u'2d' }, u'kind': u'youtube#video', u'etag': u'"m2yskBQFythfE4irbTIeOgYYfBU/-3AtVAYcRLEynWZprpf0OGaY8zo"', u'id': u'0Yx8zrbsUu8' })
def test_get_isShownBy_video(self, mock_deepharvest, mock_boto): ''' test getting correct isShownBy value for Nuxeo video object ''' deepharvest_mocker(mock_deepharvest) httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/@search?query=SELECT+%2A+FROM+' 'Document+WHERE+ecm%3AparentId+%3D+' '%274c80e254-6def-4230-9f28-bc48878568d4%27+' 'AND+ecm%3AcurrentLifeCycleState+%21%3D+%27deleted%27+ORDER+BY+' 'ecm%3Apos¤tPageIndex=0&pageSize=100', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_no_children.json').read(), status=200), ]) h = fetcher.NuxeoFetcher('https://example.edu/api/v1', 'path-to-asset/here') nuxeo_metadata = open(DIR_FIXTURES + '/nuxeo_doc_video.json').read() nuxeo_metadata = json.loads(nuxeo_metadata) isShownBy = h._get_isShownBy(nuxeo_metadata) self.assertEqual( isShownBy, 'https://s3.amazonaws.com/static.ucldc.cdlib.org/' 'ucldc-nuxeo-thumb-media/4c80e254-6def-4230-9f28-bc48878568d4')
def test_get_isShownBy_pdf(self, mock_deepharvest, mock_boto): ''' test getting correct isShownBy value for Nuxeo doc with no images and PDF at parent level ''' deepharvest_mocker(mock_deepharvest) httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/@search?query=SELECT+%2A+FROM+' 'Document+WHERE+ecm%3AparentId+%3D+' '%2700d55837-01b6-4211-80d8-b966a15c257e%27+ORDER+BY+' 'ecm%3Apos¤tPageIndex=0&pageSize=100', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_no_children.json').read(), status=200), ]) h = fetcher.NuxeoFetcher('https://example.edu/api/v1', 'path-to-asset/here') nuxeo_metadata = open(DIR_FIXTURES + '/nuxeo_doc_pdf_parent.json').read() nuxeo_metadata = json.loads(nuxeo_metadata) isShownBy = h._get_isShownBy(nuxeo_metadata) self.assertEqual( isShownBy, 'https://s3.amazonaws.com/static.ucldc.cdlib.org/' 'ucldc-nuxeo-thumb-media/00d55837-01b6-4211-80d8-b966a15c257e')
def test_single_fetching(self): url = 'http://single.edu' playlist_id = 'PLwtrWl_IBMJtjP5zMk6dVR-BRjzKqCPOM' url_vids = fetcher.YouTube_Fetcher.url_video httpretty.register_uri( httpretty.GET, url_vids, responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/youtube_single_video.json').read(), status=200) ]) h = fetcher.YouTube_Fetcher(url, playlist_id) vids = [] for v in h: vids.extend(v) self.assertEqual(len(vids), 1) self.assertEqual( vids[0], { u'contentDetails': { u'definition': u'sd', u'projection': u'rectangular', u'caption': u'false', u'duration': u'PT19M35S', u'licensedContent': True, u'dimension': u'2d' }, u'kind': u'youtube#video', u'etag': u'"m2yskBQFythfE4irbTIeOgYYfBU/-3AtVAYcRLEynWZprpf0OGaY8zo"', u'id': u'0Yx8zrbsUu8' })
def testIterateOverResults(self): '''Test the RequestSolrFetcher iteration over a mock set of data''' httpretty.register_uri( httpretty.GET, 'http://example.edu/solr', responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/ucb-cursor-results-0.json').read()), httpretty.Response( body=open(DIR_FIXTURES + '/ucb-cursor-results-1.json').read()), httpretty.Response( body=open(DIR_FIXTURES + '/ucb-cursor-results-2.json').read()), httpretty.Response( body=open(DIR_FIXTURES + '/ucb-cursor-results-3.json').read()), ]) h = fetcher.RequestsSolrFetcher( 'http://example.edu/solr', 'q=extra:data&header=app-name:Value-with:in-it' '&header=app_key:111222333') h._page_size = 1 self.assertEqual(h._query_params['q'], ['extra:data']) self.assertEqual(h._headers, { 'app-name': 'Value-with:in-it', 'app_key': '111222333' }) cursor = h._nextCursorMark docs = [] docs.append(h.next()) # gets the one from init, no get_next_results self.assertEqual(cursor, h._cursorMark) docs.append(h.next()) # get_next_results self.assertNotEqual(cursor, h._nextCursorMark) cursor = h._nextCursorMark docs.append(h.next()) # get_next_results self.assertEqual(cursor, h._cursorMark) cursor = h._nextCursorMark docs.append(h.next()) # get_next_results self.assertEqual(cursor, h._cursorMark) self.assertEqual(len(docs), 4)
def testFetch_missing_media_json(self, mock_deepharvest, mock_boto): '''Test the httpretty mocked fetching of documents''' deepharvest_mocker(mock_deepharvest) mock_boto.return_value.get_bucket.return_value.\ get_key.return_value = None httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/path-to-asset/here/@children', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_folder.json').read(), status=200), httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_folder-1.json').read(), status=200), ]) httpretty.register_uri(httpretty.GET, re.compile('https://example.edu/api/v1/id/.*'), body=open(DIR_FIXTURES + '/nuxeo_doc.json').read()) httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/asset-library/UCI/Cochems/' 'MS-R016_1092.tif/@children?currentPageIndex=0', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/nuxeo_no_children.json').read(), status=200), ]) h = fetcher.NuxeoFetcher('https://example.edu/api/v1', 'path-to-asset/here') mock_deepharvest.assert_called_with( 'path-to-asset/here', '', conf_pynux={'api': 'https://example.edu/api/v1'}) docs = [] for d in h: docs.append(d) self.assertEqual(docs[0]['structmap_text'], '') self.assertEqual(docs[1]['structmap_text'], '') self.assertEqual(docs[2]['structmap_text'], '')
def test_getMetadataPrefix(self): fmts = open(DIR_FIXTURES+'/oai-fmts.xml').read() fmts_qdc = open(DIR_FIXTURES+'/oai-fmts-qdc.xml').read() httpretty.register_uri( httpretty.GET, 'http://xxxx.cdlib.org/oai?verb=ListMetadataFormats', responses=[ httpretty.Response(body=fmts, status=200), httpretty.Response(body=fmts, status=200), httpretty.Response(body=fmts, status=200), httpretty.Response(body=fmts_qdc, status=200), ]) set_fetcher = fetcher.OAIFetcher('http://xxxx.cdlib.org/oai', 'set=bogus') self.assertEqual(set_fetcher._metadataPrefix, 'oai_dc') prefix = set_fetcher.get_metadataPrefix('') self.assertEqual(prefix, 'oai_dc') prefix = set_fetcher.get_metadataPrefix('metadataPrefix=override') self.assertEqual(prefix, 'override') prefix = set_fetcher.get_metadataPrefix('') self.assertEqual(prefix, 'oai_qdc')
def test_get_isShownBy_component_image(self, mock_deepharvest, mock_boto): ''' test getting correct isShownBy value for Nuxeo doc with no image at parent level, but an image at the component level ''' deepharvest_mocker(mock_deepharvest) httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/path/@search?query=' 'SELECT+%2A+FROM+Document+WHERE+ecm%3AparentId+%3D+' '%27d400bb29-98d4-429c-a0b8-119acdb92006%27+ORDER+BY+' 'ecm%3Apos¤tPageIndex=0&pageSize=100', responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/nuxeo_image_components.json') .read(), status=200), ]) httpretty.register_uri( httpretty.GET, 'https://example.edu/api/v1/id/' 'e8af2d74-0c8b-4d18-b86c-4067b9e16159', responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/nuxeo_first_image_component.json').read(), status=200), ]) h = fetcher.NuxeoFetcher('https://example.edu/api/v1', 'path-to-asset/here') nuxeo_metadata = open(DIR_FIXTURES + '/nuxeo_doc_imageless_parent.json').read() nuxeo_metadata = json.loads(nuxeo_metadata) isShownBy = h._get_isShownBy(nuxeo_metadata) self.assertEqual( isShownBy, 'https://nuxeo.cdlib.org/Nuxeo/nxpicsfile/default/' 'e8af2d74-0c8b-4d18-b86c-4067b9e16159/Medium:content/')
def testPreservicaFetch(self): httpretty.register_uri( httpretty.GET, 'https://us.preservica.com/api/entity/v6.0/structural-objects/eb2416ec-ac1e-4e5e-baee-84e3371c03e9/children', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/preservica-page-1.xml').read()) ]) httpretty.register_uri( httpretty.GET, 'https://us.preservica.com/api/entity/v6.0/structural-objects/eb2416ec-ac1e-4e5e-baee-84e3371c03e9/children/?start=100&max=100', match_querystring=True, responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/preservica-page-2.xml').read()) ]) httpretty.register_uri( httpretty.GET, 'https://us.preservica.com/api/entity/v6.0/information-objects/8c81f065-b6e4-457e-8b76-d18176f74bee', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/preservica-child-1.xml').read()) ]) httpretty.register_uri( httpretty.GET, 'https://us.preservica.com/api/entity/v6.0/information-objects/8c81f065-b6e4-457e-8b76-d18176f74bee/metadata/37db4583-8e8e-4778-ac90-ad443664c5cb', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/preservica-child-2.xml').read()) ]) httpretty.register_uri( httpretty.GET, 'https://us.preservica.com/api/entity/v6.0/information-objects/9501e09f-1ae8-4abc-a9ec-6c705ff8fdbe', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/preservica-child-3.xml').read()) ]) httpretty.register_uri( httpretty.GET, 'https://us.preservica.com/api/entity/v6.0/information-objects/9501e09f-1ae8-4abc-a9ec-6c705ff8fdbe/metadata/ec5c46e5-443e-4b6d-81b9-ec2a5252a50c', responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/preservica-child-4.xml').read()) ]) h = fetcher.PreservicaFetcher( 'https://oakland.access.preservica.com/v6.0/uncategorized/SO_eb2416ec-ac1e-4e5e-baee-84e3371c03e9/', 'usr, pwd') docs = [] d = h.next() docs.extend(d) logger.error(docs[0]) for d in h: docs.extend(d) self.assertEqual(len(docs), 17)
def test_photoset_fetching(self): url = 'https://example.edu' user_id = 'testphotoset' page_size = 6 url_first = fetcher.Flickr_Fetcher.url_get_photoset_template.format( api_key='boguskey', user_id=user_id, per_page=page_size, page=1) # Ugly but works httpretty.register_uri( httpretty.GET, url_first, responses=[ httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photoset-1.xml') .read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photoset-1.xml') .read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photoset-2.xml') .read(), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), httpretty.Response( body=open(DIR_FIXTURES + '/flickr-photo-info-0.xml').read( ), status=200), ]) h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size) h.doc_current = 6 self.assertRaises(ValueError, h.next) h.docs_fetched = 6 self.assertRaises(StopIteration, h.next) h = fetcher.Flickr_Fetcher(url, user_id, page_size=page_size) total = 0 all_objs = [] for objs in h: total += len(objs) all_objs.extend(objs) self.assertEqual(total, 6) self.assertEqual(len(all_objs), 6) photo_obj = all_objs[0] key_list_values = { 'description': { 'text': 'PictionID:56100666 - Catalog:C87-047-040.tif - ' 'Title:Ryan Aeronautical Negative Collection Image - ' 'Filename:C87-047-040.tif - - Image from the Teledyne Ryan ' 'Archives, donated to SDASM in the 1990s. Many of these ' 'images are from Ryan\'s UAV program-----Please Tag these ' 'images so that the information can be permanently stored ' 'with the digital file.---Repository: <a href=' '"http://www.sandiegoairandspace.org/library/stillimages.' 'html" rel="nofollow">San Diego Air and Space Museum </a>' }, 'isfavorite': '0', 'views': '499', 'farm': '5', 'people': { 'haspeople': '0', 'text': None }, 'visibility': { 'text': None, 'isfamily': '0', 'isfriend': '0', 'ispublic': '1' }, 'originalformat': 'jpg', 'owner': { 'text': None, 'nsid': "49487266@N07", 'username': "******", 'realname': "SDASM Archives", 'location': "", 'iconserver': "4070", 'iconfarm': "5", 'path_alias': "sdasmarchives", }, 'rotation': '0', 'id': '34394586825', 'dates': { 'text': None, 'lastupdate': '1493683351', 'posted': '1493683350', 'taken': '2017-05-01 17:02:30', 'takengranularity': '0', 'takenunknown': '1', }, 'originalsecret': 'd46e9b19cc', 'license': '7', 'title': { 'text': 'Ryan Aeronautical Image' }, 'media': 'photo', 'notes': [{ 'x': '10', 'authorname': 'Bees', 'text': 'foo', 'w': '50', 'author': '12037949754@N01', 'y': '10', 'h': '50', 'id': '313' }], 'tags': [{ 'raw': 'woo yay', 'text': 'wooyay', 'id': '1234', 'author': '12037949754@N01' }, { 'raw': 'hoopla', 'text': 'hoopla', 'id': '1235', 'author': '12037949754@N01' }], 'publiceditability': { 'text': None, 'cancomment': '1', 'canaddmeta': '1' }, 'comments': { 'text': '0' }, 'server': '4169', 'dateuploaded': '1493683350', 'secret': '375e0b1706', 'safety_level': '0', 'urls': [{ 'text': 'https://www.flickr.com/photos/sdasmarchives/34394586825/', 'type': 'photopage' }], 'usage': { 'text': None, 'canblog': '0', 'candownload': '1', 'canprint': '0', 'canshare': '1' }, 'editability': { 'text': None, 'cancomment': '0', 'canaddmeta': '0' }, } self.assertEqual(len(photo_obj.keys()), len(key_list_values.keys())) for k, v in key_list_values.items(): self.assertEqual(photo_obj[k], v)
def test_fetching(self): '''Basic tdd start''' url = 'https://example.edu' extra_data = 'collection:environmentaldesignarchive AND subject:"edith heath"' page_current = 1 url_first = fetcher.IA_Fetcher.url_advsearch.format( page_current=page_current, search_query=extra_data) httpretty.register_uri( httpretty.GET, url_first, responses=[ httpretty.Response(body=open(DIR_FIXTURES + '/ia-results-1.json').read()), httpretty.Response(body=open(DIR_FIXTURES + '/ia-results-2.json').read()), httpretty.Response(body=open(DIR_FIXTURES + '/ia-results-3.json').read()), ]) h = fetcher.IA_Fetcher(url, extra_data) results = [] for v in h: results.extend(v) self.assertEqual(h.url_base, url) self.assertEqual( h.url_advsearch, 'https://archive.org/advancedsearch.php?' 'q={search_query}&rows=500&page={page_current}&output=json') self.assertEqual(len(results), 1285) self.assertEqual( results[1284], { u'week': 0, u'publicdate': u'2014-02-28T03:17:59Z', u'format': [ u'Archive BitTorrent', u'JPEG', u'JPEG Thumb', u'JSON', u'Metadata' ], u'title': u'Upright Cabinet Piano', u'downloads': 68, u'indexflag': [u'index', u'nonoindex'], u'mediatype': u'image', u'collection': [ u'metropolitanmuseumofart-gallery', u'fav-mar_a_luisa_guevara_tirado', u'fav-drewblanco' ], u'month': 1, u'btih': u'e16555eb5474d2543c7ad27a1cfd145195ce05bf', u'item_size': 353871, u'backup_location': u'ia905804_31', u'year': u'1835', u'date': u'1835-01-01T00:00:00Z', u'oai_updatedate': [ u'2014-02-28T03:17:59Z', u'2014-02-28T03:17:59Z', u'2016-08-31T20:56:29Z' ], u'identifier': u'mma_upright_cabinet_piano_504395', u'subject': [ u'North and Central America', u'Wood, various materials', u'Cabinets', u'Case furniture', u'1835', u'Pianos', u'New York City', u'Metropolitan Museum of Art', u'Zithers', u'United States', u'Brooklyn', u'Musical instruments', u'Chordophones', u'New York', u'Furniture' ] })