def test_value_error_raised_when_metadata_not_present(self): ''' Metadata is not present an exception is raised. ''' d = Dataset(self.dataset_error).info() with self.assertRaises(ValueError): parse_dataset(d)
def test_metadata_is_complete(self): ''' Metadata property is complete. ''' d = Dataset(self.dataset_id).info() result = parse_dataset(d) for key in self.metadata_types.keys(): print(result['metadata']) self.assertIs(type(result['metadata'][key]), self.metadata_types[key])
def test_parser_returns_metadata_and_resource(self): ''' Dataset and the resource objects are returned. ''' d = Dataset(self.dataset_id).info() result = parse_dataset(d) self.assertIs(type(result), type({})) for key in ['metadata', 'resources']: self.assertIn(key, result.keys())
def main(): ''' Program wrapper. ''' print('%s Creating Dataverse instance.' % item('bullet')) d = Dataverse('dataverse.harvard.edu', 'IFPRI') print('%s Collecting all content from Dataverse.' % item('bullet')) contents = d.contents() # # Collects data and organizes # in lists and dictionaries. # datasets = [] resources = [] for dataset in contents: print('%s Collecting data for ID %s.' % (item('bullet'), dataset['id'])) o = Dataset(dataset['id']).info() if o.get('status') is 'ERROR' or None: continue try: parsed_data = parse_dataset(o) except ValueError: print('%s Missing metadata. Not parsing.' % item('warn')) continue else: datasets.append(parsed_data['metadata']) resources += parsed_data['resources'] # # Exports JSON files to disk.. # export_json(datasets, 'data/datasets.json') export_json(resources, 'data/resources.json') print('%s Total datasets downloaded %s' % (item('success'), str(len(datasets))))