예제 #1
0
def load_codes(items):
    count = 0
    for row in items:
        dataset = row['Dataset']
        code = row['Code']
        id = row['VarID']
        description = row['CodeDescription']
        short_description = row['ShortName']
        
        if dataset == 'EA':
            label = eavar_number_to_label(id)
        elif dataset == 'LRB':
            label = bfvar_number_to_label(id)
        else:
            logging.warn("Unknown dataset, skipping row %s" % row)
            continue

        variable = CulturalVariable.objects.filter(label=label).first()
        if variable:
            code_description, created = CulturalCodeDescription.objects.get_or_create(
                variable=variable, code=code)
            code_description.description = description
            code_description.short_description = short_description
            code_description.save()
            logging.info("Created CulturalCodeDescription: %s" % code_description)
            count += 1
        else:
            logging.warn("Missing variable in database: %s" % label)
    return count
예제 #2
0
def load_var(var_dict, categories):
    """
    Load variables from VariableList.csv
    """
    if var_dict['Dataset'] == 'EA':
        label = eavar_number_to_label(var_dict['VarID'])
        source = get_source("EA")
    elif var_dict['Dataset'] == 'LRB':
        label = bfvar_number_to_label(var_dict['VarID'])
        source = get_source("Binford")
    else:
        logging.warn("Dataset %(Dataset)s not in database, skipping row" % var_dict)
        return False

    variable, created = CulturalVariable.objects.get_or_create(
        label=label, source=source)
    variable.name = var_dict['VarTitle']
    variable.codebook_info = var_dict['VarDefinition']
    variable.data_type = var_dict['VarType']

    for c in map(clean_category, var_dict['IndexCategory'].split(',')):
        index_category = categories.get(c)
        if not index_category:
            index_category = categories[c] = CulturalCategory.objects.create(name=c)
            logging.info("Created CulturalCategory: %s" % c)

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    variable.save()
    logging.info("Created CulturalVariable: %s" % label)
    logging.info("Saved variable %s - %s" % (label, variable.name))
    return True
예제 #3
0
def _load_data(val_row, societies=None, sources=None, variables=None, descriptions=None):
    ext_id = val_row.get('soc_id')
    if ext_id not in societies:
        logging.warn(
            "Attempting to load values for %s but no Society object exists, skipping"
            % ext_id)
        return

    society = societies[ext_id]
    variable_id = val_row['VarID']

    if val_row['Dataset'] == 'EA':
        source = get_source("EA")
        label = eavar_number_to_label(variable_id)
    elif val_row['Dataset'] == 'Binford':
        source = get_source("Binford")
        label = bfvar_number_to_label(variable_id)
    else:
        logging.warn("Could not determine dataset source for row %s, skipping" % str(val_row))
        return

    variable = variables.get(label)
    if variable is None:
        logging.warn("Could not find variable %s for society %s" % (variable_id, society.name))
        return

    v = dict(
        variable=variable,
        society=society,
        source=source,
        coded_value=val_row['Code'],
        code=descriptions.get((variable.id, val_row['Code'].strip())),
        focal_year=val_row['Year'],
        comment=val_row['Comment'],
        subcase=val_row['SubCase'])

    refs = set()
    for r in val_row['EthnoReferences'].split(";"):
        r = r.strip()
        author, year = None, None
        m = BINFORD_REF_PATTERN.match(r)
        if m:
            author, year = m.group('author').strip(), m.group('year')
            if author.endswith(','):
                author = author[:-1].strip()
        else:
            ref_short = r.split(",")
            if len(ref_short) == 2:
                author = ref_short[0].strip()
                year = ref_short[1].strip().split(':')[0]
        if author and year:
            ref = sources.get((author, year))
            if ref:
                refs.add(ref.id)
            else:  # pragma: no cover
                logging.warn(
                    "Could not find reference %s, %s in database, skipping reference"
                    % (author, year))
    return v, refs