reader = csv.DictReader(f) for row in reader: row_number += 1 variable_name = "{} - {}".format(row['Indicator'].strip(), row['Subgroup'].strip()) if variable_name.lower() not in existing_variables_list: newvariable = Variable( name=variable_name, unit=row['Unit'], code=None, datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=source_name_to_object[source_name]) newvariable.save() variable_name_to_object[ variable_name.lower()] = newvariable existing_variables_list.add(newvariable.name.lower()) else: if variable_name.lower() not in variable_name_to_object: newvariable = Variable.objects.get( name=variable_name, datasetId=newdataset) while DataValue.objects.filter( variableId__pk=newvariable.pk).first(): with connection.cursor( ) as c: # if we don't limit the deleted values, the db might just hang c.execute( 'DELETE FROM %s WHERE variableId = %s LIMIT 10000;' % (DataValue._meta.db_table, newvariable.pk))
def process_one_row(year, value, countryname, variablecode, variablename, existing_fao_variables_dict, unit, source, dataset, var_desc, data_values_tuple_list): global unique_data_tracker global processed_values processed_values += 1 if processed_values % 300 == 0: time.sleep( 0.001 ) # this is done in order to not keep the CPU busy all the time insert_string = 'INSERT into data_values (value, year, fk_ent_id, fk_var_id) VALUES (%s, %s, %s, %s)' # this is used for constructing the query for mass inserting to the data_values table if year is not False and value is not False: if tuple([countryname, variablecode]) not in unique_data_tracker: if countryname not in country_name_entity_ref: if countryname.lower() in existing_entities_list: newentity = Entity.objects.get(name=countryname) elif country_tool_names_dict.get( unidecode.unidecode(countryname.lower()), 0): newentity = Entity.objects.get( name=country_tool_names_dict[unidecode.unidecode( countryname.lower())].owid_name) else: newentity = Entity(name=countryname, validated=False) newentity.save() country_name_entity_ref[countryname] = newentity if variablename not in existing_fao_variables_dict: s_unit = short_unit_extract(unit) newvariable = Variable( name=variablename, unit=unit if unit else '', short_unit=s_unit, description=var_desc, code=variablecode, timespan='', fk_dst_id=dataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source) try: with transaction.atomic(): newvariable.save() except django.db.utils.IntegrityError: newvariable = Variable( name=variablename, unit=unit if unit else '', short_unit=s_unit, description=var_desc, code=None, timespan='', fk_dst_id=dataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source) newvariable.save() existing_fao_variables_dict[variablename] = newvariable data_values_tuple_list.append( (str(value), int(year), country_name_entity_ref[countryname].pk, existing_fao_variables_dict[variablename].pk)) if len( data_values_tuple_list ) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) del data_values_tuple_list[:]