unaids_category_name_in_db = 'UNAIDS Datasets' # set the name of the root category of all data that will be imported by this script new_datasets_list = [] existing_datasets_list = [] start_time = time.time() row_number = 0 duplicate_tracker = set() with transaction.atomic(): existing_categories = DatasetCategory.objects.values('name') existing_categories_list = {item['name'] for item in existing_categories} if unaids_category_name_in_db not in existing_categories_list: the_category = DatasetCategory(name=unaids_category_name_in_db, fetcher_autocreated=True) the_category.save() else: the_category = DatasetCategory.objects.get( name=unaids_category_name_in_db) existing_subcategories = DatasetSubcategory.objects.filter( categoryId=the_category.pk).values('name') existing_subcategories_list = { item['name'] for item in existing_subcategories } existing_variables = Variable.objects.filter( datasetId__namespace='unaids').values('name') existing_variables_list = {
'type': 'Urban' } } } who_wash_category_name_in_db = 'WHO WASH Datasets' # set the name of the root category of all data that will be imported by this script start_time = time.time() with transaction.atomic(): existing_categories = DatasetCategory.objects.values('name') existing_categories_list = {item['name'] for item in existing_categories} if who_wash_category_name_in_db not in existing_categories_list: the_category = DatasetCategory(name=who_wash_category_name_in_db, fetcher_autocreated=True) the_category.save() else: the_category = DatasetCategory.objects.get( name=who_wash_category_name_in_db) existing_subcategories = DatasetSubcategory.objects.filter( categoryId=the_category.pk).values('name') existing_subcategories_list = { item['name'] for item in existing_subcategories } existing_variables = Variable.objects.filter( datasetId__namespace='who_wash').values('name') existing_variables_list = {
with open(os.path.join(metadata_location, 'metadata.csv'), encoding='utf-8') as metadata: metareader = csv.DictReader(metadata) for row in metareader: dataset_to_category[row['Dataset']] = row['Category'] import_history = ImportHistory.objects.filter(import_type='clioinfra') with transaction.atomic(): new_datasets_list = [] old_datasets_list = [] existing_categories = DatasetCategory.objects.values('name') existing_categories_list = {item['name'] for item in existing_categories} if clioinfra_category_name_in_db not in existing_categories_list: the_category = DatasetCategory(name=clioinfra_category_name_in_db, fetcher_autocreated=True) the_category.save() else: the_category = DatasetCategory.objects.get( name=clioinfra_category_name_in_db) existing_subcategories = DatasetSubcategory.objects.filter( categoryId=the_category.pk).values('name') existing_subcategories_list = { item['name'] for item in existing_subcategories } existing_entities = Entity.objects.values('name') existing_entities_list = { item['name'].lower()
penn_world_category_name_in_db = 'Penn World Table Datasets' # set the name of the root category of all data that will be imported by this script new_datasets_list = [] existing_datasets_list = [] start_time = time.time() row_number = 0 with transaction.atomic(): existing_categories = DatasetCategory.objects.values('name') existing_categories_list = {item['name'] for item in existing_categories} if penn_world_category_name_in_db not in existing_categories_list: the_category = DatasetCategory(name=penn_world_category_name_in_db, fetcher_autocreated=True) the_category.save() else: the_category = DatasetCategory.objects.get( name=penn_world_category_name_in_db) existing_subcategories = DatasetSubcategory.objects.filter( categoryId=the_category.pk).values('name') existing_subcategories_list = { item['name'] for item in existing_subcategories } existing_variables = Variable.objects.filter( datasetId__namespace='penn_world').values('name') existing_variables_list = {