def process_entities(country_names_dictionary): existing_entities = Entity.objects.values('name') existing_entities_list = { item['name'].lower() for item in existing_entities } country_tool_names = CountryName.objects.all() country_tool_names_dict = {} for each_country in country_tool_names: country_tool_names_dict[ each_country.country_name.lower()] = each_country.owid_country c_name_entity_ref = { } # this dict will hold the country names from excel and the appropriate entity object (this is used when saving the variables and their values) for c_code, country_name in country_names_dictionary.items(): if country_tool_names_dict.get( unidecode.unidecode(country_name.lower()), 0): newentity = Entity.objects.get(name=country_tool_names_dict[ unidecode.unidecode(country_name.lower())].owid_name) elif country_name.lower() in existing_entities_list: newentity = Entity.objects.get(name__iexact=country_name) else: newentity = Entity(name=country_name, validated=False) newentity.save() c_name_entity_ref[c_code] = newentity return c_name_entity_ref
if country_col not in c_name_entity_ref: if country_col == 'All countries': newentity = Entity.objects.get(name='World') elif country_col == 'Côte d\'Ivoire': newentity = Entity.objects.get(name='Cote d\'Ivoire') elif country_tool_names_dict.get( unidecode.unidecode(country_col.lower()), 0): newentity = Entity.objects.get( name=country_tool_names_dict[unidecode.unidecode( country_col.lower())].owid_name) elif country_col.lower() in existing_entities_list: newentity = Entity.objects.get( name__iexact=country_col) else: newentity = Entity(name=country_col, validated=False) newentity.save() c_name_entity_ref[country_col] = newentity try: if (int(row['Time Period']), c_name_entity_ref[country_col].pk, variable_name_to_object[variable_name.lower()].pk ) not in duplicate_tracker: data_values_tuple_list.append(( str(float(row['Data Value'])), int(row['Time Period']), c_name_entity_ref[country_col].pk, variable_name_to_object[variable_name.lower()].pk)) duplicate_tracker.add(( int(row['Time Period']), c_name_entity_ref[country_col].pk, variable_name_to_object[variable_name.lower()].pk))
def process_one_row(year, value, countryname, variablecode, variablename, existing_fao_variables_dict, unit, source, dataset, var_desc, data_values_tuple_list): global unique_data_tracker global processed_values processed_values += 1 if processed_values % 300 == 0: time.sleep( 0.001 ) # this is done in order to not keep the CPU busy all the time insert_string = 'INSERT into data_values (value, year, fk_ent_id, fk_var_id) VALUES (%s, %s, %s, %s)' # this is used for constructing the query for mass inserting to the data_values table if year is not False and value is not False: if tuple([countryname, variablecode]) not in unique_data_tracker: if countryname not in country_name_entity_ref: if countryname.lower() in existing_entities_list: newentity = Entity.objects.get(name=countryname) elif country_tool_names_dict.get( unidecode.unidecode(countryname.lower()), 0): newentity = Entity.objects.get( name=country_tool_names_dict[unidecode.unidecode( countryname.lower())].owid_name) else: newentity = Entity(name=countryname, validated=False) newentity.save() country_name_entity_ref[countryname] = newentity if variablename not in existing_fao_variables_dict: s_unit = short_unit_extract(unit) newvariable = Variable( name=variablename, unit=unit if unit else '', short_unit=s_unit, description=var_desc, code=variablecode, timespan='', fk_dst_id=dataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source) try: with transaction.atomic(): newvariable.save() except django.db.utils.IntegrityError: newvariable = Variable( name=variablename, unit=unit if unit else '', short_unit=s_unit, description=var_desc, code=None, timespan='', fk_dst_id=dataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source) newvariable.save() existing_fao_variables_dict[variablename] = newvariable data_values_tuple_list.append( (str(value), int(year), country_name_entity_ref[countryname].pk, existing_fao_variables_dict[variablename].pk)) if len( data_values_tuple_list ) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) del data_values_tuple_list[:]