def test_check_required_fields(self): doc = {'id': 'sid', '_id': 'hid'} self.assertRaisesRegexp( MissingSourceResource, '---- OMITTED: Doc:hid has no sourceResource.', has_required_fields, doc) doc['sourceResource'] = {} self.assertRaisesRegexp(MissingTitle, '---- OMITTED: Doc:hid has no title.', has_required_fields, doc) doc['sourceResource'].update({'title': 'test-title'}) self.assertRaisesRegexp(MissingRights, '---- OMITTED: Doc:hid has no rights.', has_required_fields, doc) doc['sourceResource'].update({'rights': 'hasRights'}) self.assertRaisesRegexp(MissingIsShownAt, '---- OMITTED: Doc:hid has no isShownAt.', has_required_fields, doc) doc.update({'isShownAt': 'y'}) self.assertRaisesRegexp( isShownAtNotURL, '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be' 'a URL: y', has_required_fields, doc) doc.update({'isShownAt': 'http://'}) self.assertRaisesRegexp( isShownAtNotURL, '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be' 'a URL: http://', has_required_fields, doc) doc.update({'isShownAt': 'http://netloc'}) self.assertRaisesRegexp( isShownAtNotURL, '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be' 'a URL: http://netloc', has_required_fields, doc) doc.update({'isShownAt': 'http://netloc/path'}) ret = has_required_fields(doc) self.assertEqual(ret, True) doc.update({'isShownAt': 'http://netloc/;params'}) ret = has_required_fields(doc) self.assertEqual(ret, True) doc.update({'isShownAt': 'http://netloc/?query'}) ret = has_required_fields(doc) self.assertEqual(ret, True) doc['sourceResource'].update({'type': 'image'}) self.assertRaisesRegexp( MissingImage, '---- OMITTED: Doc:hid is image type with no harvested image.', has_required_fields, doc) doc['object'] = 'has object' ret = has_required_fields(doc) self.assertEqual(ret, True)
def test_check_required_fields(self): doc = {'id': 'sid', '_id': 'hid'} self.assertRaisesRegexp(MissingSourceResource, '---- OMITTED: Doc:hid has no sourceResource.', has_required_fields, doc) doc['sourceResource'] = {} self.assertRaisesRegexp(MissingTitle, '---- OMITTED: Doc:hid has no title.', has_required_fields, doc) doc['sourceResource'].update({'title': 'test-title'}) self.assertRaisesRegexp(MissingRights, '---- OMITTED: Doc:hid has no rights.', has_required_fields, doc) doc['sourceResource'].update({'rights': 'hasRights'}) self.assertRaisesRegexp(MissingIsShownAt, '---- OMITTED: Doc:hid has no isShownAt.', has_required_fields, doc) doc.update({'isShownAt': 'y'}) self.assertRaisesRegexp( isShownAtNotURL, '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be' 'a URL: y', has_required_fields, doc) doc.update({'isShownAt': 'http://'}) self.assertRaisesRegexp( isShownAtNotURL, '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be' 'a URL: http://', has_required_fields, doc) doc.update({'isShownAt': 'http://netloc'}) self.assertRaisesRegexp( isShownAtNotURL, '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be' 'a URL: http://netloc', has_required_fields, doc) doc.update({'isShownAt': 'http://netloc/path'}) ret = has_required_fields(doc) self.assertEqual(ret, True) doc.update({'isShownAt': 'http://netloc/;params'}) ret = has_required_fields(doc) self.assertEqual(ret, True) doc.update({'isShownAt': 'http://netloc/?query'}) ret = has_required_fields(doc) self.assertEqual(ret, True) doc['sourceResource'].update({'type': 'image'}) self.assertRaisesRegexp( MissingImage, '---- OMITTED: Doc:hid is image type with no harvested image.', has_required_fields, doc) doc['object'] = 'has object' ret = has_required_fields(doc) self.assertEqual(ret, True)
def main(collection_key): v = CouchDBCollectionFilter(couchdb_obj=get_couchdb(), collection_key=collection_key) solr_db = Solr(URL_SOLR) results = [] for r in v: dt_start = dt_end = datetime.datetime.now() try: doc = fill_in_title(r.doc) has_required_fields(r.doc) except KeyError, e: print(e.message) continue solr_doc = map_couch_to_solr_doc(r.doc) results.append(solr_doc) solr_doc = push_doc_to_solr(solr_doc, solr_db=solr_db) dt_end = datetime.datetime.now()
def main(collection_key): v = CouchDBCollectionFilter( couchdb_obj=get_couchdb(), collection_key=collection_key) solr_db = Solr(URL_SOLR) results = [] for r in v: dt_start = dt_end = datetime.datetime.now() try: doc = fill_in_title(r.doc) has_required_fields(r.doc) except KeyError, e: print(e.message) continue solr_doc = map_couch_to_solr_doc(r.doc) results.append(solr_doc) solr_doc = push_doc_to_solr(solr_doc, solr_db=solr_db) dt_end = datetime.datetime.now()
def test_check_required_fields(self): doc = {"id": "test-id"} self.assertRaises(KeyError, has_required_fields, doc) doc = {"id": "test-id", "sourceResource": {}} self.assertRaises(KeyError, has_required_fields, doc) doc = {"id": "test-id", "sourceResource": {"title": "test-title", "_id": "x"}} self.assertRaises(KeyError, has_required_fields, doc) doc = {"id": "test-id", "sourceResource": {"title": "test-title"}, "_id": "x", "isShownAt": "y"} ret = has_required_fields(doc) self.assertEqual(ret, True) doc = {"id": "test-id", "sourceResource": {"title": "test-title", "type": "image"}} self.assertRaises(KeyError, has_required_fields, doc) doc = { "id": "test-id", "object": "hasobject", "_id": "x", "isShownAt": "y", "sourceResource": {"title": "test-title", "type": "image"}, } ret = has_required_fields(doc) self.assertEqual(ret, True)