def data_document_detail(request, pk): template_name = "data_document/data_document_detail.html" doc = get_object_or_404(DataDocument, pk=pk) if doc.data_group.group_type.code == "SD": messages.info( request, f'"{doc}" has no detail page. GroupType is "{doc.data_group.group_type}"', ) return redirect(reverse("data_group_detail", args=[doc.data_group_id])) ParentForm, _ = create_detail_formset(doc) Parent, Child = get_extracted_models(doc.data_group.group_type.code) ext = Parent.objects.filter(pk=doc.pk).first() chemicals = Child.objects.filter( extracted_text__data_document=doc).prefetch_related("dsstox") if Child == ExtractedListPresence: chemicals = chemicals.prefetch_related("tags") lp = ExtractedListPresence.objects.filter( extracted_text=ext if ext else None).first() tag_form = ExtractedListPresenceTagForm() context = { "doc": doc, "extracted_text": ext, "chemicals": chemicals, "edit_text_form": ParentForm(instance=ext), # empty form if ext is None "list_presence_tag_form": tag_form if lp else None, } if doc.data_group.group_type.code == "CO": script_chem = chemicals.filter(script__isnull=False).first() context[ "cleaning_script"] = script_chem.script if script_chem else None return render(request, template_name, context)
def create_detail_formset(group_type, extra=0, can_delete=False): '''Returns the pair of formsets that will be needed based on group_type. . ('CO'),('CP'),('FU'),('HP') . ''' parent, child = get_extracted_models(group_type) def make_formset(parent_model, model, fields): return forms.inlineformset_factory(parent_model=parent_model, model=model, fields=fields, extra=extra, can_delete=False) def make_custom_formset(parent_model, model, fields, formset, form): return forms.inlineformset_factory( parent_model=parent_model, model=model, fields=fields, formset=formset, #this specifies a custom formset form=form, extra=extra, can_delete=False) def one(): # for chemicals or unknown ChemicalFormSet = make_custom_formset(parent_model=parent, model=child, fields=child.detail_fields(), formset=ExtractedChemicalFormSet, form=ExtractedChemicalForm) return (ExtractedTextForm, ChemicalFormSet) def two(): # for functional_use FunctionalUseFormSet = make_formset(parent, child, child.detail_fields()) return (ExtractedTextForm, FunctionalUseFormSet) def three(): # for habits_and_practices HnPFormSet = make_formset(parent, child, child.detail_fields()) return (ExtractedTextForm, HnPFormSet) def four(): # for extracted_list_presence ListPresenceFormSet = make_formset(parent, child, child.detail_fields()) return (ExtractedCPCatForm, ListPresenceFormSet) dg_types = { 'CO': one, 'UN': one, 'FU': two, 'HP': three, 'CP': four, } func = dg_types.get(group_type, lambda: None) return func()
def test_every_extractedtext(self): """'Loop through all the ExtractedText objects and confirm that the new create_detail_formset method returns forms based on the correct models """ for et in ExtractedText.objects.all(): dd = et.data_document ParentForm, ChildForm = create_detail_formset(dd, EXTRA) child_formset = ChildForm(instance=et) # Compare the model of the child formset's QuerySet to the model # of the ExtractedText object's child objects dd_child_model = get_extracted_models( dd.data_group.group_type.code)[1] childform_model = child_formset.__dict__.get( "queryset").__dict__.get("model") self.assertEqual(dd_child_model, childform_model)
def create_detail_formset(document, extra=1, can_delete=False, exclude=[], hidden=[]): """Returns the pair of formsets that will be needed based on group_type. . ('CO'),('CP'),('FU'),('HP'),('HH') Parameters ---------- document : DataDocument The parent DataDocument extra : integer How many empty forms should be created for new records can_delete : boolean whether a delete checkbox is included exclude : list which fields to leave out of the form hiddent : list which fields to make hidden on the form . """ group_type = document.data_group.type parent, child = get_extracted_models(group_type) extracted = hasattr(document, "extractedtext") def make_formset( parent_model, model, formset=BaseInlineFormSet, form=forms.ModelForm, exclude=exclude, hidden=hidden, ): formset_fields = model.detail_fields() if exclude: formset_fields = [ in_field for in_field in formset_fields if not in_field in exclude ] # set fields to hidden if so specified widgets = dict([(in_field, forms.HiddenInput()) for in_field in formset_fields if in_field in hidden]) return forms.inlineformset_factory( parent_model=parent_model, model=model, fields=formset_fields, formset=formset, # this specifies a custom formset form=form, extra=extra, can_delete=can_delete, widgets=widgets, ) def one(): # for chemicals or unknown ChemicalFormSet = make_formset( parent_model=parent, model=child, formset=ExtractedChemicalFormSet, form=ExtractedChemicalForm, hidden=["component"], ) return (ExtractedTextForm, ChemicalFormSet) def two(): # for functional_use FunctionalUseFormSet = make_formset(parent, child) return (ExtractedTextForm, FunctionalUseFormSet) def three(): # for habits_and_practices HnPFormSet = make_formset(parent, child) return (ExtractedTextForm, HnPFormSet) def four(): # for extracted_list_presence ListPresenceFormSet = make_formset(parent, child) ParentForm = ExtractedCPCatForm if extracted else ExtractedCPCatEditForm return (ParentForm, ListPresenceFormSet) def five(): # for extracted_hh_rec HHFormSet = make_formset(parent, child) ParentForm = ExtractedHHDocForm if extracted else ExtractedHHDocEditForm return (ParentForm, HHFormSet) dg_types = { "CO": one, "UN": one, "FU": two, "HP": three, "CP": four, "HH": five } func = dg_types.get(group_type, lambda: None) return func()
def chemical_delete(request, doc_pk, chem_pk): doc = DataDocument.objects.get(pk=doc_pk) _, Chemical = get_extracted_models(doc.data_group.group_type.code) chem = Chemical.objects.get(pk=chem_pk) chem.delete() return redirect(doc)
def clean(self): validation_errors = [] # We're now CPU bound on this call, not SQL bound. Make for a more fun problem. Parent, Child = get_extracted_models(self.dg.type) unique_parent_ids = set(f.cleaned_data["data_document_id"] for f in self.forms) # Check that extraction_script is valid extraction_script_id = self.forms[0].cleaned_data["extraction_script_id"] if not Script.objects.filter( script_type="EX", pk=extraction_script_id ).exists(): err = forms.ValidationError("Invalid extraction script selection.") validation_errors.append(err) # Check that unit_type is valid unit_type_ids = ( f.cleaned_data["unit_type_id"] for f in self.forms if f.cleaned_data.get("unit_type_id") is not None ) bad_ids = get_missing_ids(UnitType, unit_type_ids) if bad_ids: err_str = 'The following "unit_type"s were not found: ' err_str += ", ".join("%d" % i for i in bad_ids) err = forms.ValidationError(err_str) validation_errors.append(err) # Check that weight_fraction_type is valid weight_fraction_type_ids = ( f.cleaned_data["weight_fraction_type_id"] for f in self.forms if f.cleaned_data.get("weight_fraction_type_id") is not None ) bad_ids = get_missing_ids(WeightFractionType, weight_fraction_type_ids) if bad_ids: err_str = 'The following "weight_fraction_type"s were not found: ' err_str += ", ".join("%d" % i for i in bad_ids) err = forms.ValidationError(err_str) validation_errors.append(err) # Check that the data_document_id are all valid datadocument_dict = DataDocument.objects.filter(data_group=self.dg).in_bulk( unique_parent_ids ) if len(datadocument_dict) != len(unique_parent_ids): bad_ids = unique_parent_ids - datadocument_dict.keys() err_str = ( 'The following "data_document_id"s were not found for this data group: ' ) err_str += ", ".join("%d" % i for i in bad_ids) err = forms.ValidationError(err_str) validation_errors.append(err) # Check that parent fields do not conflict (OneToOne check) if hasattr(Parent, "cat_code"): oto_field = "cat_code" elif hasattr(Parent, "prod_name"): oto_field = "prod_name" else: oto_field = None if oto_field: unique_parent_oto_fields = set( (f.cleaned_data["data_document_id"], f.cleaned_data[oto_field]) for f in self.forms ) if len(unique_parent_ids) != len(unique_parent_oto_fields): unseen_parents = set(unique_parent_ids) bad_ids = [] for i, _ in unique_parent_oto_fields: if i in unseen_parents: unseen_parents.remove(i) else: bad_ids.append(i) err_str = ( 'The following "data_document_id"s got unexpected "%s"s (must be 1:1): ' % oto_field ) err_str += ", ".join("%d" % i for i in bad_ids) err = forms.ValidationError(err_str) validation_errors.append(err) if validation_errors: raise forms.ValidationError(validation_errors) # Make the DataDocument, Parent, and Child objects and validate them parent_dict = Parent.objects.in_bulk(unique_parent_ids) unseen_parents = set(unique_parent_ids) for form in self.forms: data = form.cleaned_data pk = data["data_document_id"] # Parent and DataDocument if pk in unseen_parents: # DataDocument updates datadocument = datadocument_dict[pk] new_raw_category = data["raw_category"] old_raw_category = datadocument.raw_category if new_raw_category != old_raw_category: datadocument.raw_category = new_raw_category datadocument.clean(skip_type_check=True) datadocument._meta.created_fields = {} datadocument._meta.updated_fields = { "raw_category": { "old": old_raw_category, "new": new_raw_category, } } else: datadocument._meta.created_fields = {} datadocument._meta.updated_fields = {} # Parent creates parent_params = clean_dict(data, Parent) if pk not in parent_dict: parent = Parent(**parent_params) parent._meta.created_fields = parent_params parent._meta.updated_fields = {} # Parent updates else: parent = parent_dict[pk] parent._meta.created_fields = {} parent._meta.updated_fields = {} for field, new_value in parent_params.items(): old_value = getattr(parent, field) if new_value != old_value: setattr(parent, field, new_value) parent._meta.updated_fields[field] = { "old_value": old_value, "new_value": new_value, } # Mark this parent as seen unseen_parents.remove(pk) else: parent = None datadocument = None # Child creates child_params = clean_dict(data, Child) # Only include children if relevant data is attached if child_params.keys() - {"extracted_text_id", "weight_fraction_type_id"}: child = Child(**child_params) child._meta.created_fields = child_params child._meta.updated_fields = {} else: child = None # Store in dictionary data["datadocument"] = datadocument data["parent"] = parent data["child"] = child
def data_group_detail(request, pk, template_name='data_group/datagroup_detail.html'): dg = get_object_or_404(DataGroup, pk=pk, ) dg_type = str(dg.type) dg.doc_types = DocumentType.objects.filter(group_type=dg.group_type) docs = dg.datadocument_set.get_queryset()#this needs to be updated after matching... prod_link = ProductDocument.objects.filter(document__in=docs) page = request.GET.get('page') paginator = Paginator(docs, 50) # TODO: make this dynamic someday in its own ticket store = settings.MEDIA_URL + str(dg.fs_id) ext = ExtractedText.objects.filter(data_document_id__in=docs).first() if ext: ext = ext.pull_out_cp() context = { 'datagroup' : dg, 'documents' : paginator.page(1 if page is None else page), 'all_documents' : docs, # this used for template download 'extract_fields' : dg.get_extracted_template_fieldnames(), 'ext_err' : {}, 'clean_comp_err' : {}, 'extract_form' : include_extract_form(dg), 'clean_comp_data_form' : include_clean_comp_data_form(dg), 'bulk' : len(docs) - len(prod_link), 'msg' : '', } if request.method == 'POST' and 'upload' in request.POST: # match filename to pdf name matched_files = [f for d in docs for f in request.FILES.getlist('multifiles') if f.name == d.filename] if not matched_files: context['msg'] = ('There are no matching records in the ' 'selected directory.') return render(request, template_name, context) zf = zipfile.ZipFile(dg.zip_file, 'a', zipfile.ZIP_DEFLATED) while matched_files: f = matched_files.pop(0) doc = DataDocument.objects.get(filename=f.name, data_group=dg.pk) if doc.matched: continue doc.matched = True doc.save() fs = FileSystemStorage(store + '/pdf') afn = doc.get_abstract_filename() fs.save(afn, f) zf.write(store + '/pdf/' + afn, afn) zf.close() form = include_extract_form(dg) # update docs so it appears in the template table w/ "matched" docs context['all_documents'] = dg.datadocument_set.get_queryset() context['extract_form'] = form context['msg'] = 'Matching records uploaded successfully.' if request.method == 'POST' and 'extract_button' in request.POST: extract_form = ExtractionScriptForm(request.POST, request.FILES,dg_type=dg.type) if extract_form.is_valid(): csv_file = request.FILES.get('extract_file') script_pk = int(request.POST['script_selection']) script = Script.objects.get(pk=script_pk) info = [x.decode('ascii','ignore') for x in csv_file.readlines()] table = csv.DictReader(info) missing = list(set(dg.get_extracted_template_fieldnames())- set(table.fieldnames)) if missing: #column names are NOT a match, send back to user context['msg'] = ('The following columns need to be added or ' f'renamed in the csv: {missing}') return render(request, template_name, context) good_records = [] ext_parent, ext_child = get_extracted_models(dg_type) for i, row in enumerate(csv.DictReader(info)): d = docs.get(pk=int(row['data_document_id'])) d.raw_category = row.pop('raw_category') wft = request.POST.get('weight_fraction_type', None) if wft: # this signifies 'Composition' type w = 'weight_fraction_type' row[w] = WeightFractionType.objects.get(pk=int(wft)) unit_type_id = int(row['unit_type']) row['unit_type'] = UnitType.objects.get(pk=unit_type_id) rank = row['ingredient_rank'] row['ingredient_rank'] = None if rank == '' else rank ext, created = ext_parent.objects.get_or_create(data_document=d, extraction_script=script) if not created and ext.prod_name != row['prod_name']: # check that there is a 1:1 relation w/ prod_name err_msg = ['must be 1:1 with "data_document_id".'] context['ext_err'][i+1] = {'prod_name': err_msg} if created: update_fields(row, ext) row['extracted_text'] = ext if (ext_child == ExtractedListPresence): row['extracted_cpcat'] = ext row = clean_dict(row, ext_child) try: ext.full_clean() ext.save() record = ext_child(**row) record.full_clean() except ValidationError as e: context['ext_err'][i+1] = e.message_dict good_records.append((d,ext,record)) if context['ext_err']: # if errors, send back with errors return render(request, template_name, context) if not context['ext_err']: # no saving until all errors are removed for doc,text,record in good_records: doc.extracted = True doc.save() text.save() record.save() fs = FileSystemStorage(store) fs.save(str(dg)+'_extracted.csv', csv_file) context['msg'] = (f'{len(good_records)} extracted records ' 'uploaded successfully.') context['extract_form'] = include_extract_form(dg) if request.method == 'POST' and 'bulk' in request.POST: # get the set of documents that have not been matched a = set(docs.values_list('pk',flat=True)) b = set(prod_link.values_list('document_id',flat=True)) # DataDocs to make products for... docs_needing_products = DataDocument.objects.filter(pk__in=list(a-b)) stub = Product.objects.all().count() + 1 for doc in docs_needing_products: # Try to name the new product from the ExtractedText record's prod_name try: ext = ExtractedText.objects.get(data_document_id=doc.id) if ext: ext = ext.pull_out_cp() if ext.prod_name: new_prod_title = ext.prod_name else: new_prod_title = None except ExtractedText.DoesNotExist: new_prod_title = None # If the ExtractedText record can't provide a title, use the DataDocument's title if not new_prod_title: if doc.title: new_prod_title = '%s stub' % doc.title else: new_prod_title = 'unknown' product = Product.objects.create( title=new_prod_title, upc=f'stub_{stub}', data_source_id=doc.data_group.data_source_id ) ProductDocument.objects.create(product=product, document=doc) stub += 1 context['bulk'] = 0 if request.method == 'POST' and 'clean_comp_data_button' in request.POST: clean_comp_data_form = CleanCompDataForm(request.POST, request.FILES) if clean_comp_data_form.is_valid(): script_pk = int(request.POST['script_selection']) script = Script.objects.get(pk=script_pk) csv_file = request.FILES.get('clean_comp_data_file') info = [x.decode('ascii','ignore') for x in csv_file.readlines()] table = csv.DictReader(info) missing = list(set(dg.get_clean_comp_data_fieldnames())- set(table.fieldnames)) if missing: #column names are NOT a match, send back to user context['clean_comp_data_form'].collapsed = False context['msg'] = ('The following columns need to be added or ' f'renamed in the csv: {missing}') return render(request, template_name, context) good_records = [] for i, row in enumerate(csv.DictReader(info)): try: extracted_chemical = ExtractedChemical.objects.get(pk=int(row['id'])) except ExtractedChemical.DoesNotExist as e: extracted_chemical = None context['clean_comp_err'][i + 1] = {'id': ['No ExtractedChemical matches id ' + row['id'], ]} try: ingredient = Ingredient.objects.get(extracted_chemical=extracted_chemical) except Ingredient.DoesNotExist as e: ingredient = Ingredient(extracted_chemical=extracted_chemical) ingredient.lower_wf_analysis = row['lower_wf_analysis'] ingredient.central_wf_analysis = row['central_wf_analysis'] ingredient.upper_wf_analysis = row['upper_wf_analysis'] ingredient.script = script try: ingredient.full_clean() except ValidationError as e: context['clean_comp_err'][i+1] = e.message_dict good_records.append(ingredient) if context['clean_comp_err']: # if errors, send back with errors context['clean_comp_data_form'].collapsed = False return render(request, template_name, context) if not context['clean_comp_err']: # no saving until all errors are removed for ingredient in good_records: ingredient.save() context['msg'] = (f'{len(good_records)} clean composition data records ' 'uploaded successfully.') context['clean_comp_data_form'] = include_clean_comp_data_form(dg) else: context['clean_comp_data_form'].collapsed = False return render(request, template_name, context)