def sniff(self, file_upload: FileUpload, encoding: str = settings.DEFAULT_CHARSET, limit: int = 5) -> SniffResult: try: with file_upload.open() as csv_file: has_header = unicodecsv.Sniffer().has_header( csv_file.read(1024).decode(encoding)) csv_file.seek(0) dialect = unicodecsv.Sniffer().sniff( csv_file.read(1024).decode(encoding)) csv_format_opts = dict(dialect=dialect, ) csv_file.seek(0) reader = unicodecsv.reader(csv_file, **csv_format_opts) if has_header: header = next(reader) else: header = None rows = list(islice(reader, max(0, limit))) if limit > 0 else [] except (UnicodeDecodeError, unicodecsv.Error) as e: raise ParsingException(str(e)) from e contact_serializer = self.get_contact_serializer(data={}) fields = { name: field for name, field in contact_serializer.get_fields().items() if not field.read_only } headers_mapping = {} if header: for num, name in enumerate(header): field_names = difflib.get_close_matches(name, fields.keys(), n=1) if field_names: fields_name = field_names[0] headers_mapping[fields_name] = num return SniffResult( dict( has_header=has_header, delimiter=dialect.delimiter, ), list(fields.keys()), rows, headers_mapping, )
def parse_and_import( self, file_upload: FileUpload, headers: Dict[str, int], has_headers: Optional[bool] = None, # todo: maybe it is better to accept dialect to give more options to configure delimiter: Optional[str] = None, encoding: str = settings.DEFAULT_CHARSET, allow_update: bool = True, atomic: bool = False, create_failed_rows_file: bool = False, detailed_errors_limit: int = 20, campaign: Optional[Campaign] = None, contact_list: Optional[ContactList] = None) -> ImportResult: indexes = {index: header for header, index in headers.items()} with file_upload.open() as csv_file: csv_format_opts = dict( dialect=unicodecsv.excel, encoding=encoding, ) try: if has_headers is None: has_headers = unicodecsv.Sniffer().has_header( csv_file.read(1024).decode(encoding)) csv_file.seek(0) if delimiter is None: dialect = unicodecsv.Sniffer().sniff( csv_file.read(1024).decode(encoding)) csv_format_opts['dialect'] = dialect csv_file.seek(0) else: csv_format_opts['delimiter'] = delimiter csv_reader = unicodecsv.reader(csv_file, **csv_format_opts) header = next(csv_reader) if has_headers else None process_rows = partial(self._process_rows, csv_reader, indexes, allow_update, atomic, detailed_errors_limit) except (UnicodeDecodeError, unicodecsv.Error) as e: raise ParsingException(str(e)) from e failed_rows_file_upload = None with transaction.atomic(savepoint=False): if not create_failed_rows_file: created_contacts, updated_contacts, skipped_contacts, errors = process_rows( None) else: with tempfile.TemporaryFile() as fp, transaction.atomic( savepoint=False): csv_writer = unicodecsv.writer(fp, **csv_format_opts) if header: csv_writer.writerow(header) created_contacts, updated_contacts, skipped_contacts, errors = process_rows( csv_writer.writerow) if errors: fp.seek(0) failed_rows_file_upload = FileUpload.objects.create( owner=file_upload.owner, uploader=FileUploader.SYSTEM, ttl=datetime.timedelta(days=2), file=File( fp, "failed-rows-from-%s" % file_upload.name)) if campaign: participating = set( campaign.contacts.values_list('id', flat=True)) Participation.objects.bulk_create((Participation( contact_id=contact_id, campaign=campaign, ) for contact_id in chain( created_contacts, filter( lambda contact_id: contact_id not in participating, updated_contacts)))) if contact_list: contact_list.contacts.add(*created_contacts) contact_list.contacts.add(*updated_contacts) return ImportResult(len(created_contacts), len(updated_contacts), len(skipped_contacts), errors, failed_rows_file_upload)