def load_codes(items): count = 0 for row in items: dataset = row['Dataset'] code = row['Code'] id = row['VarID'] description = row['CodeDescription'] short_description = row['ShortName'] if dataset == 'EA': label = eavar_number_to_label(id) elif dataset == 'LRB': label = bfvar_number_to_label(id) else: logging.warn("Unknown dataset, skipping row %s" % row) continue variable = CulturalVariable.objects.filter(label=label).first() if variable: code_description, created = CulturalCodeDescription.objects.get_or_create( variable=variable, code=code) code_description.description = description code_description.short_description = short_description code_description.save() logging.info("Created CulturalCodeDescription: %s" % code_description) count += 1 else: logging.warn("Missing variable in database: %s" % label) return count
def load_var(var_dict, categories): """ Load variables from VariableList.csv """ if var_dict['Dataset'] == 'EA': label = eavar_number_to_label(var_dict['VarID']) source = get_source("EA") elif var_dict['Dataset'] == 'LRB': label = bfvar_number_to_label(var_dict['VarID']) source = get_source("Binford") else: logging.warn("Dataset %(Dataset)s not in database, skipping row" % var_dict) return False variable, created = CulturalVariable.objects.get_or_create( label=label, source=source) variable.name = var_dict['VarTitle'] variable.codebook_info = var_dict['VarDefinition'] variable.data_type = var_dict['VarType'] for c in map(clean_category, var_dict['IndexCategory'].split(',')): index_category = categories.get(c) if not index_category: index_category = categories[c] = CulturalCategory.objects.create(name=c) logging.info("Created CulturalCategory: %s" % c) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) variable.save() logging.info("Created CulturalVariable: %s" % label) logging.info("Saved variable %s - %s" % (label, variable.name)) return True
def _load_data(val_row, societies=None, sources=None, variables=None, descriptions=None): ext_id = val_row.get('soc_id') if ext_id not in societies: logging.warn( "Attempting to load values for %s but no Society object exists, skipping" % ext_id) return society = societies[ext_id] variable_id = val_row['VarID'] if val_row['Dataset'] == 'EA': source = get_source("EA") label = eavar_number_to_label(variable_id) elif val_row['Dataset'] == 'Binford': source = get_source("Binford") label = bfvar_number_to_label(variable_id) else: logging.warn("Could not determine dataset source for row %s, skipping" % str(val_row)) return variable = variables.get(label) if variable is None: logging.warn("Could not find variable %s for society %s" % (variable_id, society.name)) return v = dict( variable=variable, society=society, source=source, coded_value=val_row['Code'], code=descriptions.get((variable.id, val_row['Code'].strip())), focal_year=val_row['Year'], comment=val_row['Comment'], subcase=val_row['SubCase']) refs = set() for r in val_row['EthnoReferences'].split(";"): r = r.strip() author, year = None, None m = BINFORD_REF_PATTERN.match(r) if m: author, year = m.group('author').strip(), m.group('year') if author.endswith(','): author = author[:-1].strip() else: ref_short = r.split(",") if len(ref_short) == 2: author = ref_short[0].strip() year = ref_short[1].strip().split(':')[0] if author and year: ref = sources.get((author, year)) if ref: refs.add(ref.id) else: # pragma: no cover logging.warn( "Could not find reference %s, %s in database, skipping reference" % (author, year)) return v, refs