def handle(self, *args, **options): total_orphans_count = 0 for topic in Topic.objects.all(): # escape common & energy as usual if topic.slug in ["common", "energy"]: continue self.stdout.write("Topic: %s" % (topic)) orphans_count = 0 try: for Model in topic.get_models(): try: fields = utils.get_model_fields(Model) for field in fields: if field["related_model"] and field["direction"] == "out" and "through" in field["rules"]: ids= [] for Model in Model.objects.all(): ids.extend([_.id for _ in Model.node.relationships.all()]) Properties = field["rules"]["through"] for info in Properties.objects.all(): if info._relationship not in ids: self.stdout.write("\t%s is an orphelin property of the model %s. The relation doesn't exist no more." % (info._NodeModel__node, Model.__class__.__name__)) orphans_count += 1 total_orphans_count += 1 if options["fix"]: self.stdout.write("\tremoving %s" % (info)) info.delete() except Exception as e: self.stderr.write("\tError with model %s (%s)" % (Model.__class__.__name__, e)) self.stdout.write("\tfound %d orphans" % (orphans_count)) except Exception as e: self.stderr.write("\tError with model %s (%s)" % (Model.__class__.__name__, e)) self.stdout.write("TOTAL: found %d orphans" % (total_orphans_count))
def get_literal_search_output(self): output = lambda m: {'name': m.name, 'label': m.label, 'subject': m.subject} terms = self.get_literal_search() _out = [] for model in self.get_models(): for field in [f for f in utils.get_model_fields(model) if f['type'].lower() != 'relationship']: if "search_terms" in field["rules"]: _out += [{'name': field['name'], 'label': st, 'subject': model._meta.object_name} for st in field["rules"]["search_terms"]] return _out + [ output(rs) for rs in terms ]
def get_columns(model): edges = dict() columns = [] fields = utils.get_model_fields(model) for field in fields: if field['type'] != 'Relationship': if field['name'] not in ['id']: columns.append(field['name']) else: edges[field['rel_type']] = [field['model'], field['name'], field['related_model']] return (columns, edges)
def field(self): cache_key = "%s__field" % (self.name) field = utils.topic_cache.get(self.topic, cache_key) if field is None and self.name: topic_models = self.topic.get_models() for model in topic_models: # Retreive every relationship field for this model for f in utils.get_model_fields(model): if f["name"] == self.name: field = f field["rules"]["through"] = None # Yes, this is ugly but this field is creating Pickling errors. utils.topic_cache.set(self.topic, cache_key, field) return field
def rdf_search(self, subject, predicate, obj): obj = obj["name"] if "name" in obj else obj # retrieve all models in current topic all_models = dict((model.__name__, model) for model in self.topic.get_models()) # If the received obj describe a literal value if self.is_registered_literal(predicate["name"]): # Get the field name into the database field_name = predicate["name"] # Build the request query = """ START root=node(*) MATCH (root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(root.{field}) AND root.{field} = {value} AND type.model_name = {model} AND type.app_label = {app} RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( field=field_name, value=adapt(obj), model=adapt(subject["name"]), app=adapt(self.topic.app_label()) ) # If the received obj describe a literal value elif self.is_registered_relationship(predicate["name"]): fields = utils.get_model_fields( all_models[predicate["subject"]] ) # Get the field name into the database relationships = [ field for field in fields if field["name"] == predicate["name"] ] # We didn't find the predicate if not len(relationships): return {'errors': 'Unkown predicate type'} relationship = relationships[0]["rel_type"] # Query to get every result query = """ START st=node(*) MATCH (st)<-[:`{relationship}`]-(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND st.name = {name} AND type.app_label = {app} RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( relationship=relationship, name=adapt(obj), app=adapt(self.topic.app_label()) ) else: return {'errors': 'Unkown predicate type'} return connection.cypher(query).to_dicts()
def summary_forms(self, bundle, request): available_resources = {} # Get the model's rules manager rulesManager = request.current_topic.get_rules() # Fetch every registered model # to print out its rules for model in self.topic.get_models(): name = model.__name__.lower() rules = rulesManager.model(model).all() fields = utils.get_model_fields(model) verbose_name = getattr(model._meta, "verbose_name", name) verbose_name_plural = getattr(model._meta, "verbose_name_plural", verbose_name + "s") for key in rules: # Filter rules to keep only Neomatch if isinstance(rules[key], Neomatch): fields.append({ "name" : key, "type" : "ExtendedRelationship", "verbose_name" : rules[key].title, "rules" : {}, "related_model": rules[key].target_model.__name__ }) for field in fields: # Create a copy of the rule to avoid compromize the rules singleton field["rules"] = field["rules"].copy() for key, rule in field["rules"].items(): # Convert class to model name if inspect.isclass(rule): field["rules"][key] = getattr(rule, "__name__", rule) try: idx = model.__idx__ except AttributeError: idx = 0 available_resources[name] = { 'description' : getattr(model, "_description", None), 'topic' : getattr(model, "_topic", self.topic.slug) or self.topic.slug, 'model' : getattr(model, "__name__", ""), 'verbose_name' : verbose_name, 'verbose_name_plural' : verbose_name_plural, 'name' : name, 'fields' : fields, 'rules' : rules, 'index' : idx } return available_resources
def field(self): field = None if self.name: # Build a cache key with the topic token cache_key = "%s__%s__field" % ( self.topic.module, self.name ) # Try to use the cache value if cache.get(cache_key) is not None: field = cache.get(cache_key) else: topic_models = self.topic.get_models() for model in topic_models: # Retreive every relationship field for this model for f in utils.get_model_fields(model): if f["name"] == self.name: field = f # Very small cache to optimize recording cache.set(cache_key, field, 10) return field
def summary_forms(self, bundle): available_resources = {} # Get the model's rules manager rulesManager = register_model_rules() # Fetch every registered model # to print out its rules for model in get_registered_models(): # Do this ressource has a model? # Do this ressource is a part of apps? if model != None and model.__module__.startswith("app.detective.apps"): name = model.__name__.lower() rules = rulesManager.model(model).all() fields = get_model_fields(model) verbose_name = getattr(model._meta, "verbose_name", name).title() verbose_name_plural = getattr(model._meta, "verbose_name_plural", verbose_name + "s").title() # Extract the model parent to find its scope scope = model.__module__.split(".")[-2] for key in rules: # Filter rules to keep only Neomatch if isinstance(rules[key], Neomatch): fields.append({ "name" : key, "type" : "ExtendedRelationship", "verbose_name" : rules[key].title, "rules" : {}, "related_model": rules[key].target_model.__name__ }) available_resources[name] = { 'description' : getattr(model, "_description", None), 'scope' : getattr(model, "_scope", scope), 'model' : getattr(model, "__name_", ""), 'verbose_name' : verbose_name, 'verbose_name_plural' : verbose_name_plural, 'name' : name, 'fields' : fields, 'rules' : rules } return available_resources
def formfield_for_choice_field(self, db_field, request, **kwargs): if db_field.name == 'name' and hasattr(request, "topic_id"): # We add choices for this field using the current topic's models kwargs["choices"] = [] # Get the current topic with the ID set into the parent form topic = Topic.objects.get(id=request.topic_id) # Get the topic's models models = topic.get_models() for model in models: model_name = getattr(model._meta, "verbose_name").title() subset = [] # Retreive every relationship field for this model for field in utils.get_model_fields(model): if field["type"] != 'AutoField': choice = [ field["name"], field["verbose_name"].title(), ] # Add ... at the end ot the relationship field if field["type"] == 'Relationship': choice[1] += "..." subset.append(choice) # Add the choice subset only if it contains elements if len(subset): kwargs["choices"].append( (model_name, subset,) ) return super(SearchTermInline, self).formfield_for_choice_field(db_field, request,**kwargs)
def summary_forms(self, bundle, request): available_resources = {} # Get the model's rules manager rulesManager = topics_rules() # Fetch every registered model # to print out its rules for model in self.topic.get_models(): name = model.__name__.lower() rules = rulesManager.model(model).all() fields = utils.get_model_fields(model) verbose_name = getattr(model._meta, "verbose_name", name).title() verbose_name_plural = getattr(model._meta, "verbose_name_plural", verbose_name + "s").title() for key in rules: # Filter rules to keep only Neomatch if isinstance(rules[key], Neomatch): fields.append({ "name" : key, "type" : "ExtendedRelationship", "verbose_name" : rules[key].title, "rules" : {}, "related_model": rules[key].target_model.__name__ }) available_resources[name] = { 'description' : getattr(model, "_description", None), 'topic' : getattr(model, "_topic", self.topic.slug) or self.topic.slug, 'model' : getattr(model, "__name_", ""), 'verbose_name' : verbose_name, 'verbose_name_plural' : verbose_name_plural, 'name' : name, 'fields' : fields, 'rules' : rules } return available_resources
def process_bulk_parsing_and_save_as_model(topic, files, start_time=None): """ Job which parses uploaded content, validates and saves them as model """ start_time = start_time != None and start_time or time.time() entities = {} relations = [] errors = [] id_mapping = {} nb_lines = 0 file_reading_progression = 0 job = get_current_job() # Define Exceptions class Error (Exception): """ Generic Custom Exception for this endpoint. Include the topic. """ def __init__(self, **kwargs): """ set the topic and add all the parameters as attributes """ self.topic = topic.title for key, value in kwargs.items(): setattr(self, key, value) def __str__(self): return self.__dict__ class WarningCastingValueFail (Error): pass class WarningValidationError (Error): pass class WarningKeyUnknown (Error): pass class WarningInformationIsMissing (Error): pass class AttributeDoesntExist (Error): pass class WrongCSVSyntax (Error): pass class ColumnUnknow (Error): pass class ModelDoesntExist (Error): pass class RelationDoesntExist (Error): pass try: assert type(files) in (tuple, list), type(files) assert len(files) > 0, "You need to upload at least one file." assert type(files[0]) in (tuple, list) assert len(files[0]) == 2 # retrieve all models in current topic all_models = dict((model.__name__, model) for model in topic.get_models()) # iterate over all files and dissociate entities .csv from relations .csv for file in files: if type(file) is tuple: file_name = file[0] file = file[1] else: raise Exception() csv_reader = utils.open_csv(file) header = csv_reader.next() assert len(header) > 1, "{file_name} header should have at least 2 columns" assert header[0].endswith("_id"), "{file_name} : First column should begin with a header like <model_name>_id. Actually {first_col}".format(file_name=file_name, first_col=header[0]) if len(header) >=3 and header[0].endswith("_id") and header[2].endswith("_id"): # this is a relationship file relations.append((file_name, file)) else: # this is an entities file model_name = utils.to_class_name(header[0].replace("_id", "")) if model_name in all_models.keys(): entities[model_name] = (file_name, file) else: raise ModelDoesntExist(model=model_name, file=file_name, models_availables=all_models.keys()) nb_lines += len(file) - 1 # -1 removes headers # first iterate over entities logger.debug("BulkUpload: creating entities") for entity, (file_name, file) in entities.items(): csv_reader = utils.open_csv(file) header = csv_reader.next() # must check that all columns map to an existing model field fields = utils.get_model_fields(all_models[entity]) fields_types = {} for field in fields: fields_types[field['name']] = field['type'] field_names = [field['name'] for field in fields] columns = [] for column in header[1:]: column = utils.to_underscores(column) if not column in field_names and not column.endswith("__sources__"): raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break if column.endswith("__sources__"): column_type = "__sources__" column = column[:-len("__sources__")] if not column in field_names: raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break else: column_type = fields_types.get(column, None) columns.append((column, column_type)) else: # here, we know that all columns are valid for row in csv_reader: data = {} sources = {} entity_id = row[0] for i, (column, column_type) in enumerate(columns): value = str(row[i+1]).decode('utf-8') # cast value if needed if value: try: if "Integer" in column_type: value = int(value) # TODO: cast float if "Date" in column_type: value = datetime.datetime(*map(int, re.split('[^\d]', value)[:3])).replace(tzinfo=utc) except Exception as e: e = WarningCastingValueFail( column_name = column, value = value, type = column_type, data = data, model=entity, file = file_name, line = csv_reader.line_num, error = str(e) ) errors.append(e) break if column_type == "__sources__": sources[column] = value else: data[column] = value else: # instanciate a model try: item = all_models[entity].objects.create(**data) # map the object with the ID defined in the .csv id_mapping[(entity, entity_id)] = item # create sources for sourced_field, reference in sources.items(): for ref in reference.split("||"): FieldSource.objects.create(individual=item.id, field=sourced_field, reference=ref) # FIXME: job can be accessed somewhere else (i.e detective/topics/common/jobs.py:JobResource) # Concurrent access are not secure here. # For now we refresh the job just before saving it. file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = (float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except Exception as e: errors.append( WarningValidationError( data = data, model = entity, file = file_name, line = csv_reader.line_num, error = str(e) ) ) inserted_relations = 0 # then iterate over relations logger.debug("BulkUpload: creating relations") for file_name, file in relations: # create a csv reader csv_reader = utils.open_csv(file) csv_header = csv_reader.next() relation_name = utils.to_underscores(csv_header[1]) model_from = utils.to_class_name(csv_header[0].replace("_id", "")) model_to = utils.to_class_name(csv_header[2].replace("_id", "")) properties_name = csv_header[3:] # retrieve ModelProperties from related model ModelProperties = topic.get_rules().model(all_models[model_from]).field(relation_name).get("through") # check that the relation actually exists between the two objects try: getattr(all_models[model_from], relation_name) except Exception as e: raise RelationDoesntExist( file = file_name, model_from = model_from, model_to = model_to, relation_name = relation_name, fields_available = [field['name'] for field in utils.iterate_model_fields(all_models[model_from])], error = str(e)) for row in csv_reader: id_from = row[0] id_to = row[2] properties = [p.decode('utf-8') for p in row[3:]] if id_to and id_from: try: instance_from = id_mapping[(model_from, id_from)] instance_to = id_mapping[(model_to, id_to)] getattr(instance_from, relation_name).add(instance_to) # add properties if needed if ModelProperties and properties_name and properties: # save the relationship to create an id instance_from.save() # retrieve this id relation_id = next(rel.id for rel in instance_from.node.relationships.outgoing() if rel.end.id == instance_to.id) # properties of the relationship relation_args = { "_endnodes" : [id_mapping[(model_from, id_from)].id, instance_to.id], "_relationship" : relation_id, } # Pairwise the properties with their names relation_args.update(zip(properties_name, properties)) try: ModelProperties.objects.create(**relation_args) except TypeError as e: errors.append( AttributeDoesntExist( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_args = relation_args, error = str(e) ) ) # update the job inserted_relations += 1 file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = (float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except KeyError as e: errors.append( WarningKeyUnknown( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_name = relation_name, error = str(e) ) ) except Exception as e: # Error unknown, we break the process to alert the user raise Error( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_name = relation_name, error = str(e)) else: # A key is missing (id_from or id_to) but we don't want to stop the parsing. # Then we store the wrong line to return it to the user. errors.append( WarningInformationIsMissing( file=file_name, row=row, line=csv_reader.line_num, id_to=id_to, id_from=id_from ) ) # Save everything saved = 0 logger.debug("BulkUpload: saving %d objects" % (len(id_mapping))) if job: job.refresh() job.meta["objects_to_save"] = len(id_mapping) job.save() for item in id_mapping.values(): item.save() saved += 1 if job: job.refresh() job.meta["saving_progression"] = saved job.save() if job: job.refresh() if job and "track" in job.meta: from django.core.mail import send_mail user = User.objects.get(pk=job.meta["user"]) send_mail("upload finished", "your upload just finished", settings.DEFAULT_FROM_EMAIL, (user.email,)) return { 'duration' : (time.time() - start_time), 'inserted' : { 'objects' : saved, 'links' : inserted_relations }, "errors" : sorted([dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors]) } except Exception as e: import traceback logger.error(traceback.format_exc()) if e.__dict__: message = str(e.__dict__) else: message = e.message return { "errors" : [{e.__class__.__name__ : message}] }
def process_parsing(topic, files): """ Job which reads the uploaded files, validate and saves them as model """ entities = {} relations = [] errors = [] id_mapping = {} assert type(files) in (tuple, list) assert len(files) > 0 assert type(files[0]) in (tuple, list) assert len(files[0]) == 2 # Define Exceptions class Error (Exception): """ Generic Custom Exception for this endpoint. Include the topic. """ def __init__(self, **kwargs): """ set the topic and add all the parameters as attributes """ self.topic = topic.title for key, value in kwargs.items(): setattr(self, key, value) def __str__(self): return self.__dict__ class WarningCastingValueFail (Error): pass class WarningValidationError (Error): pass class WarningKeyUnknown (Error): pass class WarningInformationIsMissing (Error): pass class AttributeDoesntExist (Error): pass class WrongCSVSyntax (Error): pass class ColumnUnknow (Error): pass class ModelDoesntExist (Error): pass class RelationDoesntExist (Error): pass try: # retrieve all models in current topic all_models = dict((model.__name__, model) for model in topic.get_models()) # iterate over all files and dissociate entities .csv from relations .csv for file in files: if type(file) is tuple: file_name = file[0] file = file[1] elif hasattr(file, "read"): file_name = file.name else: raise Exception("ERROR") csv_reader = utils.open_csv(file) header = csv_reader.next() assert len(header) > 1, "header should have at least 2 columns" assert header[0].endswith("_id"), "First column should begin with a header like <model_name>_id" if len(header) >=3 and header[0].endswith("_id") and header[2].endswith("_id"): # this is a relationship file relations.append((file_name, file)) else: # this is an entities file model_name = utils.to_class_name(header[0].replace("_id", "")) if model_name in all_models.keys(): entities[model_name] = (file_name, file) else: raise ModelDoesntExist(model=model_name, file=file_name, models_availables=all_models.keys()) # first iterate over entities logger.debug("BulkUpload: creating entities") for entity, (file_name, file) in entities.items(): csv_reader = utils.open_csv(file) header = csv_reader.next() # must check that all columns map to an existing model field fields = utils.get_model_fields(all_models[entity]) fields_types = {} for field in fields: fields_types[field['name']] = field['type'] field_names = [field['name'] for field in fields] columns = [] for column in header[1:]: column = utils.to_underscores(column) if column is not '': if not column in field_names: raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break column_type = fields_types[column] columns.append((column, column_type)) else: # here, we know that all columns are valid for row in csv_reader: data = {} id = row[0] for i, (column, column_type) in enumerate(columns): value = str(row[i+1]).decode('utf-8') # cast value if needed if value: try: if "Integer" in column_type: value = int(value) # TODO: cast float if "Date" in column_type: value = datetime.datetime(*map(int, re.split('[^\d]', value)[:-1])).replace(tzinfo=utc) except Exception as e: e = WarningCastingValueFail( column_name = column, value = value, type = column_type, data = data, model=entity, file = file_name, line = csv_reader.line_num, error = str(e) ) errors.append(e) break data[column] = value else: # instanciate a model try: item = all_models[entity].objects.create(**data) # map the object with the ID defined in the .csv id_mapping[(entity, id)] = item except Exception as e: errors.append( WarningValidationError( data = data, model = entity, file = file_name, line = csv_reader.line_num, error = str(e) ) ) inserted_relations = 0 # then iterate over relations logger.debug("BulkUpload: creating relations") for file_name, file in relations: # create a csv reader csv_reader = utils.open_csv(file) csv_header = csv_reader.next() relation_name = utils.to_underscores(csv_header[1]) model_from = utils.to_class_name(csv_header[0].replace("_id", "")) model_to = utils.to_class_name(csv_header[2].replace("_id", "")) # check that the relation actually exists between the two objects try: getattr(all_models[model_from], relation_name) except Exception as e: raise RelationDoesntExist( file = file_name, model_from = model_from, model_to = model_to, relation_name = relation_name, fields_available = [field['name'] for field in utils.get_model_fields(all_models[model_from])], error = str(e)) for row in csv_reader: id_from = row[0] id_to = row[2] if id_to and id_from: try: getattr(id_mapping[(model_from, id_from)], relation_name).add(id_mapping[(model_to, id_to)]) inserted_relations += 1 except KeyError as e: errors.append( WarningKeyUnknown( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_name = relation_name, error = str(e) ) ) except Exception as e: # Error unknown, we break the process to alert the user raise Error( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_name = relation_name, error = str(e)) else: # A key is missing (id_from or id_to) but we don't want to stop the parsing. # Then we store the wrong line to return it to the user. errors.append( WarningInformationIsMissing( file=file_name, row=row, line=csv_reader.line_num, id_to=id_to, id_from=id_from ) ) # Save everything saved = 0 logger.debug("BulkUpload: saving %d objects" % (len(id_mapping))) for item in id_mapping.values(): item.save() saved += 1 return { 'inserted' : { 'objects' : saved, 'links' : inserted_relations }, "errors" : sorted([dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors]) } except Exception as e: import traceback logger.error(traceback.format_exc()) return { "errors" : [{e.__class__.__name__ : str(e.__dict__)}] }
def process_bulk_parsing_and_save_as_model(topic, files, start_time=None): """ Job which parses uploaded content, validates and saves them as model """ start_time = start_time != None and start_time or time.time() entities = {} relations = [] errors = [] id_mapping = {} nb_lines = 0 file_reading_progression = 0 job = get_current_job() # Define Exceptions class Error(Exception): """ Generic Custom Exception for this endpoint. Include the topic. """ def __init__(self, **kwargs): """ set the topic and add all the parameters as attributes """ self.topic = topic.title for key, value in kwargs.items(): setattr(self, key, value) def __str__(self): return self.__dict__ class WarningCastingValueFail(Error): pass class WarningValidationError(Error): pass class WarningKeyUnknown(Error): pass class WarningInformationIsMissing(Error): pass class AttributeDoesntExist(Error): pass class WrongCSVSyntax(Error): pass class ColumnUnknow(Error): pass class ModelDoesntExist(Error): pass class RelationDoesntExist(Error): pass try: assert type(files) in (tuple, list), type(files) assert len(files) > 0, "You need to upload at least one file." assert type(files[0]) in (tuple, list) assert len(files[0]) == 2 # retrieve all models in current topic all_models = dict( (model.__name__, model) for model in topic.get_models()) # iterate over all files and dissociate entities .csv from relations .csv for file in files: if type(file) is tuple: file_name = file[0] file = file[1] else: raise Exception() csv_reader = utils.open_csv(file) header = csv_reader.next() assert len( header ) > 1, "{file_name} header should have at least 2 columns" assert header[0].endswith( "_id" ), "{file_name} : First column should begin with a header like <model_name>_id. Actually {first_col}".format( file_name=file_name, first_col=header[0]) if len(header) >= 3 and header[0].endswith( "_id") and header[2].endswith("_id"): # this is a relationship file relations.append((file_name, file)) else: # this is an entities file model_name = utils.to_class_name(header[0].replace("_id", "")) if model_name in all_models.keys(): entities[model_name] = (file_name, file) else: raise ModelDoesntExist(model=model_name, file=file_name, models_availables=all_models.keys()) nb_lines += len(file) - 1 # -1 removes headers # first iterate over entities logger.debug("BulkUpload: creating entities") for entity, (file_name, file) in entities.items(): csv_reader = utils.open_csv(file) header = csv_reader.next() # must check that all columns map to an existing model field fields = utils.get_model_fields(all_models[entity]) fields_types = {} for field in fields: fields_types[field['name']] = field['type'] field_names = [field['name'] for field in fields] columns = [] for column in header[1:]: column = utils.to_underscores(column) if not column in field_names and not column.endswith( "__sources__"): raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break if column.endswith("__sources__"): column_type = "__sources__" column = column[:-len("__sources__")] if not column in field_names: raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break else: column_type = fields_types.get(column, None) columns.append((column, column_type)) else: # here, we know that all columns are valid for row in csv_reader: data = {} sources = {} entity_id = row[0] for i, (column, column_type) in enumerate(columns): value = str(row[i + 1]).decode('utf-8') # cast value if needed if value: try: if "Integer" in column_type: value = int(value) # TODO: cast float if "Date" in column_type: value = datetime.datetime(*map( int, re.split('[^\d]', value)[:3])).replace( tzinfo=utc) except Exception as e: e = WarningCastingValueFail( column_name=column, value=value, type=column_type, data=data, model=entity, file=file_name, line=csv_reader.line_num, error=str(e)) errors.append(e) break if column_type == "__sources__": sources[column] = value else: data[column] = value else: # instanciate a model try: item = all_models[entity].objects.create(**data) # map the object with the ID defined in the .csv id_mapping[(entity, entity_id)] = item # create sources for sourced_field, reference in sources.items(): for ref in reference.split("||"): FieldSource.objects.create( individual=item.id, field=sourced_field, reference=ref) # FIXME: job can be accessed somewhere else (i.e detective/topics/common/jobs.py:JobResource) # Concurrent access are not secure here. # For now we refresh the job just before saving it. file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = ( float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except Exception as e: errors.append( WarningValidationError( data=data, model=entity, file=file_name, line=csv_reader.line_num, error=str(e))) inserted_relations = 0 # then iterate over relations logger.debug("BulkUpload: creating relations") for file_name, file in relations: # create a csv reader csv_reader = utils.open_csv(file) csv_header = csv_reader.next() relation_name = utils.to_underscores(csv_header[1]) model_from = utils.to_class_name(csv_header[0].replace("_id", "")) model_to = utils.to_class_name(csv_header[2].replace("_id", "")) properties_name = csv_header[3:] # retrieve ModelProperties from related model ModelProperties = topic.get_rules().model( all_models[model_from]).field(relation_name).get("through") # check that the relation actually exists between the two objects try: getattr(all_models[model_from], relation_name) except Exception as e: raise RelationDoesntExist( file=file_name, model_from=model_from, model_to=model_to, relation_name=relation_name, fields_available=[ field['name'] for field in utils.iterate_model_fields( all_models[model_from]) ], error=str(e)) for row in csv_reader: id_from = row[0] id_to = row[2] properties = [p.decode('utf-8') for p in row[3:]] if id_to and id_from: try: instance_from = id_mapping[(model_from, id_from)] instance_to = id_mapping[(model_to, id_to)] getattr(instance_from, relation_name).add(instance_to) # add properties if needed if ModelProperties and properties_name and properties: # save the relationship to create an id instance_from.save() # retrieve this id relation_id = next( rel.id for rel in instance_from.node.relationships.outgoing() if rel.end.id == instance_to.id) # properties of the relationship relation_args = { "_endnodes": [ id_mapping[(model_from, id_from)].id, instance_to.id ], "_relationship": relation_id, } # Pairwise the properties with their names relation_args.update( zip(properties_name, properties)) try: ModelProperties.objects.create(**relation_args) except TypeError as e: errors.append( AttributeDoesntExist( file=file_name, line=csv_reader.line_num, model_from=model_from, id_from=id_from, model_to=model_to, id_to=id_to, relation_args=relation_args, error=str(e))) # update the job inserted_relations += 1 file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = ( float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except KeyError as e: errors.append( WarningKeyUnknown(file=file_name, line=csv_reader.line_num, model_from=model_from, id_from=id_from, model_to=model_to, id_to=id_to, relation_name=relation_name, error=str(e))) except Exception as e: # Error unknown, we break the process to alert the user raise Error(file=file_name, line=csv_reader.line_num, model_from=model_from, id_from=id_from, model_to=model_to, id_to=id_to, relation_name=relation_name, error=str(e)) else: # A key is missing (id_from or id_to) but we don't want to stop the parsing. # Then we store the wrong line to return it to the user. errors.append( WarningInformationIsMissing(file=file_name, row=row, line=csv_reader.line_num, id_to=id_to, id_from=id_from)) # Save everything saved = 0 logger.debug("BulkUpload: saving %d objects" % (len(id_mapping))) if job: job.refresh() job.meta["objects_to_save"] = len(id_mapping) job.save() for item in id_mapping.values(): item.save() saved += 1 if job: job.refresh() job.meta["saving_progression"] = saved job.save() if job: job.refresh() if job and "track" in job.meta: from django.core.mail import send_mail user = User.objects.get(pk=job.meta["user"]) send_mail("upload finished", "your upload just finished", settings.DEFAULT_FROM_EMAIL, (user.email, )) return { 'duration': (time.time() - start_time), 'inserted': { 'objects': saved, 'links': inserted_relations }, "errors": sorted([ dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors ]) } except Exception as e: import traceback logger.error(traceback.format_exc()) if e.__dict__: message = str(e.__dict__) else: message = e.message return {"errors": [{e.__class__.__name__: message}]}