def __init__(self): # initialize system field, so later it does not get created multiple # times in a race condition get_system_field() # Only need to run once raise MiddlewareNotUsed
def testKeepSystemFieldValues(self): identifier = Field.objects.get(name='identifier', standard__prefix='dc') title = Field.objects.get(name='title', standard__prefix='dc') system = get_system_field() r1 = Record.objects.create(name='s001') CollectionItem.objects.create(record=r1, collection=self.collection) r1.fieldvalue_set.create(field=identifier, value='S001') r1.fieldvalue_set.create(field=title, value='Title') r1.fieldvalue_set.create(field=system, value='Keep this') testimport = SpreadsheetImport( StringIO("Identifier,Title\nS002,NewTitle2\nS001,NewTitle1"), [self.collection]) testimport.name_field = 'Identifier' testimport.run(update=True) self.assertEqual(1, testimport.added) self.assertEqual(1, testimport.updated) self.assertEqual(0, testimport.added_skipped) self.assertEqual(0, testimport.updated_skipped) t1 = self.collection.records.get(name='s001').fieldvalue_set.filter( field=title) t2 = self.collection.records.get(name='s002').fieldvalue_set.filter( field=title) s = self.collection.records.get(name='s001').fieldvalue_set.filter( field=system) self.assertEqual('NewTitle1', t1[0].value) self.assertEqual('NewTitle2', t2[0].value) self.assertEqual('Keep this', s[0].value)
def testKeepSystemFieldValues(self): no_signals() identifier = Field.objects.get(name='identifier', standard__prefix='dc') title = Field.objects.get(name='title', standard__prefix='dc') system = get_system_field() r1 = Record.objects.create(name='s001') CollectionItem.objects.create(record=r1, collection=self.collection) r1.fieldvalue_set.create(field=identifier, value='S001') r1.fieldvalue_set.create(field=title, value='Title') r1.fieldvalue_set.create(field=system, value='Keep this') testimport = SpreadsheetImport(StringIO("Identifier,Title\nS002,NewTitle2\nS001,NewTitle1"), [self.collection]) testimport.name_field = 'Identifier' testimport.run(update=True) self.assertEqual(1, testimport.added) self.assertEqual(1, testimport.updated) self.assertEqual(0, testimport.added_skipped) self.assertEqual(0, testimport.updated_skipped) t1 = self.collection.records.get(name='s001').fieldvalue_set.filter(field=title) t2 = self.collection.records.get(name='s002').fieldvalue_set.filter(field=title) s = self.collection.records.get(name='s001').fieldvalue_set.filter(field=system) self.assertEqual('NewTitle1', t1[0].value) self.assertEqual('NewTitle2', t2[0].value) self.assertEqual('Keep this', s[0].value)
def run(self, update=True, add=True, test=False, update_names=False, target_collections=[], skip_rows=0): if not self.analyzed: self.analyze(preview_rows=1) identifier_field = self.get_identifier_field() if not identifier_field: raise SpreadsheetImport.NoIdentifierException("No column is mapped to an identifier field") system_field = get_system_field() def apply_values(record, row, is_new=False): if not is_new: record.fieldvalue_set.filter(~Q(field=system_field), owner=None).delete() for field, values in row.iteritems(): target = self.mapping.get(field) if target and values: for order, value in enumerate(values): record.fieldvalue_set.create( field=target, value=value, label=self.labels.get(field), order=self.order.get(field, order), hidden=self.hidden.get(field, False), ) reader = self._get_reader() self.added = ( self.added_skipped ) = ( self.updated ) = ( self.updated_skipped ) = ( self.duplicate_in_file_skipped ) = self.no_id_skipped = self.owner_skipped = self.duplicate_in_collection_skipped = 0 self.processed_ids = dict() def process_row(row): ids = row[identifier_field] if self.processed_ids.has_key("\n".join(ids)): self.duplicate_in_file_skipped += 1 for func in self.on_duplicate_in_file_skipped: func(ids) return self.processed_ids["\n".join(ids)] = None fvs = FieldValue.objects.select_related("record").filter( record__collection__in=self.collections, owner=None, field__in=self._identifier_ids, value__in=ids ) if not fvs: if add: # create new record if not test: record = Record.objects.create( owner=self.owner, name=row.get(self.name_field, [None])[0] if self.name_field else None ) apply_values(record, row, is_new=True) for collection in target_collections or self.collections: CollectionItem.objects.get_or_create(record=record, collection=collection) self.added += 1 for func in self.on_added: func(ids) else: # adding new records is disabled self.added_skipped += 1 for func in self.on_added_skipped: func(ids) elif len(fvs) == 1: if fvs[0].record.owner == self.owner: if update: # update existing record (including records just created in previous row) if not test: record = fvs[0].record apply_values(record, row) if update_names: record.name = row.get(self.name_field, [None])[0] if self.name_field else None record.save(force_update_name=True) for collection in target_collections or self.collections: CollectionItem.objects.get_or_create(record=record, collection=collection) self.updated += 1 for func in self.on_updated: func(ids) else: # updating records is disabled self.updated_skipped += 1 for func in self.on_updated_skipped: func(ids) else: self.owner_skipped += 1 for func in self.on_owner_skipped: func(ids) else: # duplicate id found self.duplicate_in_collection_skipped += 1 for func in self.on_duplicate_in_collection_skipped: func(ids) for skip in range(skip_rows): reader.next() last_row = None for i, row in enumerate(reader): row = self._split_values(row) if not last_row: last_row = row continue # compare IDs of current and last rows last_id = last_row.get(identifier_field) if not last_id: last_row = row self.no_id_skipped += 1 for func in self.on_no_id_skipped: func(None) continue current_id = row.get(identifier_field) if not current_id or (last_id == current_id): # combine current and last rows for key, values in row.iteritems(): v = last_row.setdefault(key, []) for value in values or []: if not value in v: v.append(value) last_row[key] = v for func in self.on_continuation: func(last_id) else: process_row(last_row) last_row = row if last_row: process_row(last_row)
def run(self, update=True, add=True, test=False, update_names=False, target_collections=[], skip_rows=0): if not self.analyzed: self.analyze(preview_rows=1) identifier_field = self.get_identifier_field() if not identifier_field: raise SpreadsheetImport.NoIdentifierException( 'No column is mapped to an identifier field') system_field = get_system_field() def apply_values(record, row, is_new=False): if not is_new: record.fieldvalue_set.filter(~Q(field=system_field), owner=None).delete() for field, values in row.iteritems(): target = self.mapping.get(field) if target and values: for order, value in enumerate(values): record.fieldvalue_set.create( field=target, value=value, label=self.labels.get(field), order=self.order.get(field, order), hidden=self.hidden.get(field, False)) reader = self._get_reader() self.added = self.added_skipped = self.updated = self.updated_skipped = \ self.duplicate_in_file_skipped = self.no_id_skipped = self.owner_skipped = \ self.duplicate_in_collection_skipped = 0 self.processed_ids = dict() def process_row(row): ids = row[identifier_field] if self.processed_ids.has_key('\n'.join(ids)): self.duplicate_in_file_skipped += 1 for func in self.on_duplicate_in_file_skipped: func(ids) return self.processed_ids['\n'.join(ids)] = None fvs = FieldValue.objects.select_related('record').filter( record__collection__in=self.collections, owner=None, field__in=self._identifier_ids, value__in=ids) if not fvs: if add: # create new record if not test: record = Record.objects.create( owner=self.owner, name=row.get(self.name_field, [None])[0] if self.name_field else None) apply_values(record, row, is_new=True) for collection in target_collections or self.collections: CollectionItem.objects.get_or_create( record=record, collection=collection) self.added += 1 for func in self.on_added: func(ids) else: # adding new records is disabled self.added_skipped += 1 for func in self.on_added_skipped: func(ids) elif len(fvs) == 1: if fvs[0].record.owner == self.owner: if update: # update existing record (including records just created in previous row) if not test: record = fvs[0].record apply_values(record, row) if update_names: record.name = row.get( self.name_field, [None])[0] if self.name_field else None record.save(force_update_name=True) for collection in target_collections or self.collections: CollectionItem.objects.get_or_create( record=record, collection=collection) self.updated += 1 for func in self.on_updated: func(ids) else: # updating records is disabled self.updated_skipped += 1 for func in self.on_updated_skipped: func(ids) else: self.owner_skipped += 1 for func in self.on_owner_skipped: func(ids) else: # duplicate id found self.duplicate_in_collection_skipped += 1 for func in self.on_duplicate_in_collection_skipped: func(ids) for skip in range(skip_rows): reader.next() last_row = None for i, row in enumerate(reader): row = self._split_values(row) if not last_row: last_row = row continue # compare IDs of current and last rows last_id = last_row.get(identifier_field) if not last_id: last_row = row self.no_id_skipped += 1 for func in self.on_no_id_skipped: func(None) continue current_id = row.get(identifier_field) if not current_id or (last_id == current_id): # combine current and last rows for key, values in row.iteritems(): v = last_row.setdefault(key, []) for value in (values or []): if not value in v: v.append(value) last_row[key] = v for func in self.on_continuation: func(last_id) else: process_row(last_row) last_row = row if last_row: process_row(last_row)
def run(self, update=True, add=True, test=False, target_collections=[], skip_rows=0): if not self.analyzed: self.analyze(preview_rows=1) identifier_field = self.get_identifier_field() if not identifier_field: raise SpreadsheetImport.NoIdentifierException( 'No column is mapped to an identifier field') system_field = get_system_field() def apply_values(record, row, is_new=False): if not is_new: record.fieldvalue_set.filter(~Q(field=system_field), owner=None).delete() for field, values in row.iteritems(): target = self.mapping.get(field) if target and values: for order, value in enumerate(values): record.fieldvalue_set.create( field=target, value=value, label=self.labels.get(field), order=self.order.get(field, order), hidden=self.hidden.get(field, False), refinement=self.refinements.get(field)) reader = self._get_reader() self.added = self.added_skipped = self.updated = \ self.updated_skipped = self.duplicate_in_file_skipped = \ self.no_id_skipped = self.owner_skipped = \ self.duplicate_in_collection_skipped = 0 self.processed_ids = dict() def process_row(row): ids = row[identifier_field] if '\n'.join(ids) in self.processed_ids: self.duplicate_in_file_skipped += 1 for func in self.on_duplicate_in_file_skipped: func(ids) return self.processed_ids['\n'.join(ids)] = None fvs = FieldValue.objects.select_related('record').filter( record__collection__in=self.collections, owner=None, field__in=self._identifier_ids, index_value__in=(x[:32] for x in ids), value__in=ids) if not fvs: if add: # create new record if not test: record = Record.objects.create(owner=self.owner) apply_values(record, row, is_new=True) for collection in ( target_collections or self.collections): CollectionItem.objects.get_or_create( record=record, collection=collection ) self.added += 1 for func in self.on_added: func(ids) else: # adding new records is disabled self.added_skipped += 1 for func in self.on_added_skipped: func(ids) elif len(fvs) == 1: if fvs[0].record.owner == self.owner: if update: # update existing record # (including records just created in previous row) if not test: record = fvs[0].record apply_values(record, row) for collection in ( target_collections or self.collections): CollectionItem.objects.get_or_create( record=record, collection=collection) self.updated += 1 for func in self.on_updated: func(ids) else: # updating records is disabled self.updated_skipped += 1 for func in self.on_updated_skipped: func(ids) else: self.owner_skipped += 1 for func in self.on_owner_skipped: func(ids) else: # duplicate id found self.duplicate_in_collection_skipped += 1 for func in self.on_duplicate_in_collection_skipped: func(ids) for skip in range(skip_rows): reader.next() last_row = None try: for i, row in enumerate(reader): # On every row, delay record indexing for a little longer delay_record_indexing() reset_queries() row = self._split_values(row) if not last_row: last_row = row continue # compare IDs of current and last rows last_id = last_row.get(identifier_field) if not last_id: last_row = row self.no_id_skipped += 1 for func in self.on_no_id_skipped: func(None) continue current_id = row.get(identifier_field) if not current_id or (last_id == current_id): # combine current and last rows for key, values in row.iteritems(): v = last_row.get(key) or [] for value in (values or []): if value not in v: v.append(value) last_row[key] = v for func in self.on_continuation: func(last_id) else: process_row(last_row) last_row = row if last_row: process_row(last_row) finally: resume_record_indexing()