def upload_metadata_csv(request): if request.method != 'POST': form = CSVUploadForm() return render(request, 'documents/manage_upload_metadata_csv.html', locals()) form = CSVUploadForm(request.POST, request.FILES) if not form.is_valid(): return render(request, 'documents/manage_upload_metadata_csv.html', locals()) csv_file = request.FILES['csv'] # FIXME: It's pretty annoying to hard code the expected encoding of the csv reader = UnicodeReader(open(csv_file.temporary_file_path()), encoding="iso-8859-1", delimiter='\t') initial = [] products = {} seen_sources = set() seen_title_author = set() for row in reader: fields = {'title': row[0], 'author': row[1], 'identifier': row[2], 'source': row[3], 'source_edition': row[4], 'source_publisher': row[5], 'language': Document.language_choices[0][0]} # collect all products for a given isbn if fields['source'] != '' and VALID_ISBN_RE.match(fields['source']): products.setdefault(fields['source'], set()).add(fields['identifier']) # filter entries in the csv that deal with the same isbn or with the same title/author combo if fields['source'] in seen_sources or fields['title'] + fields['author'] in seen_title_author: continue if fields['source']: seen_sources.add(fields['source']) if fields['title'] + fields['author']: seen_title_author.add(fields['title'] + fields['author']) # FIXME: Obviously the following is highly SBS specific if row[7] != '0': fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[1][0] fields['production_series_number'] = row[7] elif row[6].upper().find('SJW') != -1: fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[0][0] m = re.search('\d+', row[6]) # extract the series number if m != None: fields['production_series_number'] = m.group(0) if row[8] == 'D': fields['production_source'] = Document.PRODUCTION_SOURCE_CHOICES[0][0] initial.append(fields) # filter out existing document entries new_identifiers = [row['identifier'] for row in initial] new_sources = [row['source'] for row in initial if row['source']] duplicate_identifiers = [document.identifier for document in Document.objects.filter(identifier__in=new_identifiers)] duplicate_sources = [document.source for document in Document.objects.filter(source__in=new_sources)] unique_initial = [row for row in initial if row['identifier'] not in duplicate_identifiers and row['source'] not in duplicate_sources] ProductFormset = formset_factory(PartialProductForm, extra=0) product_formset = ProductFormset(initial=[{'isbn': isbn, 'productNumber': number} for (isbn, v) in products.items() for number in v], prefix='products') DocumentFormSet = modelformset_factory(Document, fields=('author', 'title', 'identifier', 'source', 'source_edition', 'source_publisher', 'language', 'production_series', 'production_series_number', 'production_source'), extra=len(unique_initial), can_delete=True) document_formset = DocumentFormSet(queryset=Document.objects.none(), initial=unique_initial, prefix='documents') return render(request, 'documents/manage_import_metadata_csv.html', locals())
def upload_metadata_csv(request): if request.method != 'POST': form = CSVUploadForm() return render_to_response('documents/manage_upload_metadata_csv.html', locals(), context_instance=RequestContext(request)) form = CSVUploadForm(request.POST, request.FILES) if not form.is_valid(): return render_to_response('documents/manage_upload_metadata_csv.html', locals(), context_instance=RequestContext(request)) csv_file = request.FILES['csv'] # FIXME: It's pretty annoying to hard code the expected encoding of the csv reader = UnicodeReader(open(csv_file.temporary_file_path()), encoding="iso-8859-1", delimiter='\t') initial = [] for row in reader: fields = {'title': row[0], 'author': row[1], 'identifier': row[2], 'source': row[3], 'source_edition': row[4], 'source_publisher': row[5], 'language': Document.language_choices[0][0]} # FIXME: Obviously the following is highly SBS specific if row[7] != '0': fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[1][0] fields['production_series_number'] = row[7] elif row[6].upper().find('SJW') != -1: fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[0][0] m = re.search('\d+', row[6]) # extract the series number if m != None: fields['production_series_number'] = m.group(0) initial.append(fields) # filter out existing document entries new_identifiers = [row['identifier'] for row in initial] duplicate_identifiers = [document.identifier for document in Document.objects.filter(identifier__in=new_identifiers)] unique_initial = [row for row in initial if row['identifier'] not in duplicate_identifiers] DocumentFormSet = modelformset_factory(Document, fields=('author', 'title', 'identifier', 'source', 'source_edition', 'source_publisher', 'language', 'production_series', 'production_series_number'), extra=len(unique_initial), can_delete=True) formset = DocumentFormSet(queryset=Document.objects.none(), initial=unique_initial) return render_to_response('documents/manage_import_metadata_csv.html', locals(), context_instance=RequestContext(request))