Пример #1
0
            reader = csv.DictReader(f)

            for row in reader:
                row_number += 1

                variable_name = "{} - {}".format(row['Indicator'].strip(),
                                                 row['Subgroup'].strip())
                if variable_name.lower() not in existing_variables_list:
                    newvariable = Variable(
                        name=variable_name,
                        unit=row['Unit'],
                        code=None,
                        datasetId=newdataset,
                        variableTypeId=VariableType.objects.get(pk=4),
                        sourceId=source_name_to_object[source_name])
                    newvariable.save()
                    variable_name_to_object[
                        variable_name.lower()] = newvariable
                    existing_variables_list.add(newvariable.name.lower())
                else:
                    if variable_name.lower() not in variable_name_to_object:
                        newvariable = Variable.objects.get(
                            name=variable_name, datasetId=newdataset)
                        while DataValue.objects.filter(
                                variableId__pk=newvariable.pk).first():
                            with connection.cursor(
                            ) as c:  # if we don't limit the deleted values, the db might just hang
                                c.execute(
                                    'DELETE FROM %s WHERE variableId = %s LIMIT 10000;'
                                    %
                                    (DataValue._meta.db_table, newvariable.pk))
Пример #2
0
def process_one_row(year, value, countryname, variablecode, variablename,
                    existing_fao_variables_dict, unit, source, dataset,
                    var_desc, data_values_tuple_list):

    global unique_data_tracker
    global processed_values

    processed_values += 1
    if processed_values % 300 == 0:
        time.sleep(
            0.001
        )  # this is done in order to not keep the CPU busy all the time

    insert_string = 'INSERT into data_values (value, year, fk_ent_id, fk_var_id) VALUES (%s, %s, %s, %s)'  # this is used for constructing the query for mass inserting to the data_values table

    if year is not False and value is not False:
        if tuple([countryname, variablecode]) not in unique_data_tracker:
            if countryname not in country_name_entity_ref:
                if countryname.lower() in existing_entities_list:
                    newentity = Entity.objects.get(name=countryname)
                elif country_tool_names_dict.get(
                        unidecode.unidecode(countryname.lower()), 0):
                    newentity = Entity.objects.get(
                        name=country_tool_names_dict[unidecode.unidecode(
                            countryname.lower())].owid_name)
                else:
                    newentity = Entity(name=countryname, validated=False)
                    newentity.save()
                country_name_entity_ref[countryname] = newentity

            if variablename not in existing_fao_variables_dict:
                s_unit = short_unit_extract(unit)
                newvariable = Variable(
                    name=variablename,
                    unit=unit if unit else '',
                    short_unit=s_unit,
                    description=var_desc,
                    code=variablecode,
                    timespan='',
                    fk_dst_id=dataset,
                    fk_var_type_id=VariableType.objects.get(pk=4),
                    sourceId=source)
                try:
                    with transaction.atomic():
                        newvariable.save()
                except django.db.utils.IntegrityError:
                    newvariable = Variable(
                        name=variablename,
                        unit=unit if unit else '',
                        short_unit=s_unit,
                        description=var_desc,
                        code=None,
                        timespan='',
                        fk_dst_id=dataset,
                        fk_var_type_id=VariableType.objects.get(pk=4),
                        sourceId=source)
                    newvariable.save()
                existing_fao_variables_dict[variablename] = newvariable
            data_values_tuple_list.append(
                (str(value), int(year),
                 country_name_entity_ref[countryname].pk,
                 existing_fao_variables_dict[variablename].pk))
            if len(
                    data_values_tuple_list
            ) > 3000:  # insert when the length of the list goes over 3000
                with connection.cursor() as c:
                    c.executemany(insert_string, data_values_tuple_list)
                del data_values_tuple_list[:]