def post(self, request, *args, **kwargs): form = ImportISATabFileForm(request.POST, request.FILES) if form.is_valid() or request.is_ajax(): try: f = form.cleaned_data['isa_tab_file'] except KeyError: f = None try: url = form.cleaned_data['isa_tab_url'] except KeyError: url = None if url: response = self.import_by_url(url) else: try: response = self.import_by_file(f) except Exception as e: logger.error(traceback.format_exc(e)) return HttpResponseBadRequest("{} {}".format( PARSER_UNEXPECTED_ERROR_MESSAGE, e)) # get AWS Cognito identity ID if settings.REFINERY_DEPLOYMENT_PLATFORM == 'aws': try: identity_id = request.POST.get('identity_id') except (KeyError, ValueError): error_msg = 'identity_id is missing' error = {'error_message': error_msg} if request.is_ajax(): return HttpResponseBadRequest( json.dumps({'error': error_msg}), 'application/json') else: return render(request, self.template_name, error) else: identity_id = None if not response['success']: if request.is_ajax(): return HttpResponseBadRequest( json.dumps({'error': response["message"]}), content_type='application/json') return render_to_response(self.template_name, context_instance=RequestContext( request, { 'form': form, 'error': response["message"] })) logger.debug("Temp file name: '%s'", response['data']['temp_file_path']) try: parse_isatab_invocation = parse_isatab( request.user.username, False, response['data']['temp_file_path'], identity_id=identity_id) except ParserException as e: error_message = "{} {}".format(PARSER_ERROR_MESSAGE, e.message) logger.error(error_message) return HttpResponseBadRequest(error_message) except Exception as e: error_message = "{} {}".format(PARSER_UNEXPECTED_ERROR_MESSAGE, traceback.format_exc(e)) logger.error(error_message) return HttpResponseBadRequest("{} {}".format( PARSER_UNEXPECTED_ERROR_MESSAGE, e)) else: dataset_uuid = parse_isatab_invocation try: os.unlink(response['data']['temp_file_path']) except OSError as e: logger.error("Couldn't unlink temporary file: %s %s", response['data']['temp_file_path'], e) # import data files if dataset_uuid: try: dataset = DataSet.objects.get(uuid=dataset_uuid) except (DataSet.DoesNotExist, DataSet.MultipleObjectsReturned): logger.error( "Cannot import data files for data set UUID '%s'", dataset_uuid) else: # start importing uploaded data files for file_store_item in dataset.get_file_store_items(): if file_store_item.source.startswith( (settings.REFINERY_DATA_IMPORT_DIR, 's3://')): import_file.delay(file_store_item.uuid) if request.is_ajax(): return JsonResponse({ 'success': 'Data set imported', 'data': { 'new_data_set_uuid': dataset_uuid } }) return HttpResponseRedirect( reverse(self.success_view_name, args=[dataset_uuid])) else: error = 'Problem parsing ISA-Tab file' if request.is_ajax(): return JsonResponse({'error': error}) context = RequestContext(request, { 'form': form, 'error': error }) return render_to_response(self.template_name, context_instance=context) else: # submitted form is not valid context = RequestContext(request, {'form': form}) return render_to_response(self.template_name, context_instance=context)
def run(self): # create investigation, study and assay objects investigation = self._create_investigation() # FIXME: self.metadata_file.name may not be informative, especially in # case of temp files that don't exist on disk study = self._create_study(investigation=investigation, file_name=self.metadata_file.name) assay = self._create_assay(study=study, file_name=self.metadata_file.name) # import in file as "pre-isa" file logger.info("trying to add pre-isa archive file %s", self.metadata_file.name) # FIXME: this will not create a FileStoreItem if self.metadata_file # does not exist on disk (e.g., a file object like TemporaryFile) investigation.pre_isarchive_file = create(self.metadata_file.name) import_file(investigation.pre_isarchive_file, refresh=True) investigation.save() # TODO: test if there are fewer columns than required logger.debug( "Parsing with file column %s and " "auxiliary file column %s", self.file_column_index, self.auxiliary_file_column_index) # UUIDs of data files to postpone importing until parsing is finished data_files = [] # iterate over non-header rows in file for row in self.metadata_reader: # TODO: resolve relative indices internal_source_column_index = self.source_column_index internal_sample_column_index = self.sample_column_index internal_assay_column_index = self.assay_column_index # add data file to file store data_file_path = self.file_source_translator( row[self.file_column_index]) data_file_uuid = create(source=data_file_path) data_files.append(data_file_uuid) # add auxiliary file to file store if self.auxiliary_file_column_index: auxiliary_file_path = self.file_source_translator( row[self.auxiliary_file_column_index]) auxiliary_file_uuid = create(source=auxiliary_file_path) data_files.append(auxiliary_file_uuid) else: auxiliary_file_uuid = None # add files to file server # TODO: add error handling in case of None values for UUIDs file_server.models.add(data_file_uuid, auxiliary_file_uuid) # create nodes if file was successfully created # source node source_name = self._create_name(row, internal_source_column_index, self.file_column_index) source_node, is_source_new = Node.objects.get_or_create( study=study, name=source_name, type=Node.SOURCE) # sample node sample_name = self._create_name(row, internal_sample_column_index, self.file_column_index) sample_node, is_sample_new = Node.objects.get_or_create( study=study, name=sample_name, type=Node.SAMPLE) source_node.add_child(sample_node) # assay node assay_name = self._create_name(row, internal_assay_column_index, self.file_column_index) assay_node, is_assay_new = Node.objects.get_or_create( study=study, assay=assay, name=assay_name, type=Node.ASSAY) sample_node.add_child(assay_node) file_node = Node.objects.create( study=study, assay=assay, name=row[self.file_column_index].strip(), file_uuid=data_file_uuid, type=Node.RAW_DATA_FILE, species=self._get_species(row), genome_build=self._get_genome_build(row), is_annotation=self._is_annotation(row)) assay_node.add_child(file_node) # iterate over columns to create attributes to attach to sample # node for column_index in range(0, len(row)): # skip data file column if (self.file_column_index == column_index or self.auxiliary_file_column_index == column_index or self.annotation_column_index == column_index): continue # create attribute as characteristic and attach to sample node # if the sample node was newly created if is_sample_new: Attribute.objects.create( node=sample_node, type=Attribute.CHARACTERISTICS, subtype=self.headers[column_index].strip().lower(), value=row[column_index].strip()) # Start remote file import tasks if `Make Import Permanent:` flag set # by the user # Likewise, we'll try to import these files if their source begins with # our REFINERY_DATA_IMPORT_DIR setting (This will be the case if # users upload datafiles associated with their metadata) for uuid in data_files: try: file_store_item = FileStoreItem.objects.get(uuid=uuid) except (FileStoreItem.DoesNotExist, FileStoreItem.MultipleObjectsReturned) as e: logger.error("Couldn't properly fetch FileStoreItem %s", e) else: if (self.file_permanent or file_store_item.source.startswith( (settings.REFINERY_DATA_IMPORT_DIR, 's3://'))): import_file.delay(uuid) return investigation
def run(self): # create investigation, study and assay objects investigation = self._create_investigation() # FIXME: self.metadata_file.name may not be informative, especially in # case of temp files that don't exist on disk study = self._create_study(investigation=investigation, file_name=self.metadata_file.name) assay = self._create_assay(study=study, file_name=self.metadata_file.name) # import in file as "pre-isa" file logger.info("trying to add pre-isa archive file %s", self.metadata_file.name) # FIXME: this will not create a FileStoreItem if self.metadata_file # does not exist on disk (e.g., a file object like TemporaryFile) investigation.pre_isarchive_file = create( self.metadata_file.name, permanent=True) import_file(investigation.pre_isarchive_file, refresh=True) investigation.save() # TODO: test if there are fewer columns than required logger.debug("Parsing with file column %s and " "auxiliary file column %s", self.file_column_index, self.auxiliary_file_column_index) # UUIDs of data files to postpone importing until parsing is finished data_files = [] # iterate over non-header rows in file for row in self.metadata_reader: # TODO: resolve relative indices internal_source_column_index = self.source_column_index internal_sample_column_index = self.sample_column_index internal_assay_column_index = self.assay_column_index # add data file to file store data_file_path = self.file_source_translator( row[self.file_column_index]) data_file_uuid = create( source=data_file_path, permanent=self.file_permanent) data_files.append(data_file_uuid) # add auxiliary file to file store if self.auxiliary_file_column_index: auxiliary_file_path = self.file_source_translator( row[self.auxiliary_file_column_index]) auxiliary_file_uuid = create( source=auxiliary_file_path, permanent=self.file_permanent) data_files.append(auxiliary_file_uuid) else: auxiliary_file_uuid = None # add files to file server # TODO: add error handling in case of None values for UUIDs file_server.models.add(data_file_uuid, auxiliary_file_uuid) # create nodes if file was successfully created # source node source_name = self._create_name( row, internal_source_column_index, self.file_column_index) source_node, is_source_new = Node.objects.get_or_create( study=study, name=source_name, type=Node.SOURCE) # sample node sample_name = self._create_name( row, internal_sample_column_index, self.file_column_index) sample_node, is_sample_new = Node.objects.get_or_create( study=study, name=sample_name, type=Node.SAMPLE) source_node.add_child(sample_node) # assay node assay_name = self._create_name( row, internal_assay_column_index, self.file_column_index) assay_node, is_assay_new = Node.objects.get_or_create( study=study, assay=assay, name=assay_name, type=Node.ASSAY) sample_node.add_child(assay_node) file_node = Node.objects.create( study=study, assay=assay, name=row[self.file_column_index].strip(), file_uuid=data_file_uuid, type=Node.RAW_DATA_FILE, species=self._get_species(row), genome_build=self._get_genome_build(row), is_annotation=self._is_annotation(row)) assay_node.add_child(file_node) # iterate over columns to create attributes to attach to sample # node for column_index in range(0, len(row)): # skip data file column if (self.file_column_index == column_index or self.auxiliary_file_column_index == column_index or self.annotation_column_index == column_index): continue # create attribute as characteristic and attach to sample node # if the sample node was newly created if is_sample_new: Attribute.objects.create( node=sample_node, type=Attribute.CHARACTERISTICS, subtype=self.headers[column_index].strip().lower(), value=row[column_index].strip() ) # kick off data file importing tasks for uuid in data_files: import_file.delay(uuid) return investigation