def get_uploaded_datasets( self, trans, context, override_name = None, override_info = None ): def get_data_file_filename( data_file, override_name = None, override_info = None ): dataset_name = override_name dataset_info = override_info def get_file_name( file_name ): file_name = file_name.split( '\\' )[-1] file_name = file_name.split( '/' )[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name( data_file['filename'] ) if not dataset_info: dataset_info = 'uploaded file' return Bunch( type='file', path=data_file['local_filename'], name=get_file_name( data_file['filename'] ) ) #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch( type=None, path=None, name=None ) #return None, None, None, None, None def get_url_paste_urls_or_filename( group_incoming, override_name = None, override_info = None ): filenames = [] url_paste_file = group_incoming.get( 'url_paste', None ) if url_paste_file is not None: url_paste = open( url_paste_file, 'r' ).read( 1024 ) if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ): url_paste = url_paste.replace( '\r', '' ).split( '\n' ) for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ): continue # non-url line, ignore precreated_name = line dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch( type='url', path=line, name=precreated_name ) #yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch( type='file', path=url_paste_file, name=precreated_name ) #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename( context ): data_file = context['file_data'] url_paste = context['url_paste'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) warnings = [] space_to_tab = False if context.get( 'space_to_tab', None ) not in ["None", None]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) if file_bunch.path and url_paste: if url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) else: #we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ): if file_bunch.path: break return file_bunch, warnings def get_filenames( context ): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) space_to_tab = False if context.get( 'space_to_tab', None ) not in ["None", None]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ): if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) return rval file_type = self.get_file_type( context ) d_type = self.get_datatype( trans, context ) dbkey = context.get( 'dbkey', None ) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [ None for filename in writable_files ] for group_incoming in context.get( self.name, [] ): i = int( group_incoming['__index__'] ) groups_incoming[ i ] = group_incoming if d_type.composite_type is not None: #handle uploading of composite datatypes #Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} #load metadata files_metadata = context.get( self.metadata_ref, {} ) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: dataset.metadata[ meta_name ] = files_metadata[ meta_name ] dataset_name = None dataset_info = None if dataset.datatype.composite_type == 'auto_primary_file': #replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file() ), prefix='upload_auto_primary_file' ) dataset.primary_file = temp_name dataset.space_to_tab = False dataset.precreated_name = dataset.name = 'Uploaded Composite Dataset (%s)' % ( file_type ) else: file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] ) if dataset.datatype.composite_type: precreated_name = 'Uploaded Composite Dataset (%s)' % ( file_type ) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.space_to_tab = file_bunch.space_to_tab dataset.precreated_name = file_bunch.precreated_name dataset.name = file_bunch.precreated_name dataset.warnings.extend( file_bunch.warnings ) if dataset.primary_file is None:#remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [ value.name for value in writable_files.values() ] for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ): key = keys[ i + writable_files_offset ] if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) dataset.composite_files[ key ] = None else: file_bunch, warnings = get_one_filename( group_incoming ) if file_bunch.path: dataset.composite_files[ key ] = file_bunch.__dict__ else: dataset.composite_files[ key ] = None if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) ) return [ dataset ] else: datasets = get_filenames( context[ self.name ][0] ) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext( trans, context ) dataset.dbkey = dbkey rval.append( dataset ) return rval
def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None): def get_data_file_filename(data_file, override_name=None, override_info=None): dataset_name = override_name dataset_info = override_info def get_file_name(file_name): file_name = file_name.split('\\')[-1] file_name = file_name.split('/')[-1] return file_name try: # Use the existing file if not dataset_name and 'filename' in data_file: dataset_name = get_file_name(data_file['filename']) if not dataset_info: dataset_info = 'uploaded file' return Bunch(type='file', path=data_file['local_filename'], name=get_file_name(data_file['filename'])) #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info except: # The uploaded file should've been persisted by the upload tool action return Bunch(type=None, path=None, name=None) #return None, None, None, None, None def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): filenames = [] url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file, 'r').read(1024) if url_paste.lstrip().lower().startswith( 'http://') or url_paste.lstrip().lower().startswith( 'ftp://'): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not line.lower().startswith( 'http://') and not line.lower().startswith( 'ftp://'): continue # non-url line, ignore precreated_name = line dataset_name = override_name if not dataset_name: dataset_name = line dataset_info = override_info if not dataset_info: dataset_info = 'uploaded url' yield Bunch(type='url', path=line, name=precreated_name) #yield ( 'url', line, precreated_name, dataset_name, dataset_info ) else: dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here if override_name: dataset_name = override_name if override_info: dataset_info = override_info yield Bunch(type='file', path=url_paste_file, name=precreated_name) #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info ) def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] name = context.get('NAME', None) info = context.get('INFO', None) warnings = [] space_to_tab = False if context.get('space_to_tab', None) not in ["None", None]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path and url_paste: if url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) else: #we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: break return file_bunch, warnings def get_filenames(context): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] name = context.get('NAME', None) info = context.get('INFO', None) space_to_tab = False if context.get('space_to_tab', None) not in ["None", None]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) return rval file_type = self.get_file_type(context) d_type = self.get_datatype(trans, context) dbkey = context.get('dbkey', None) writable_files = d_type.writable_files writable_files_offset = 0 groups_incoming = [None for filename in writable_files] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming if d_type.composite_type is not None: #handle uploading of composite datatypes #Only one Dataset can be created dataset = Bunch() dataset.type = 'composite' dataset.file_type = file_type dataset.dbkey = dbkey dataset.datatype = d_type dataset.warnings = [] dataset.metadata = {} dataset.composite_files = {} #load metadata files_metadata = context.get(self.metadata_ref, {}) for meta_name, meta_spec in d_type.metadata_spec.iteritems(): if meta_spec.set_in_upload: if meta_name in files_metadata: dataset.metadata[meta_name] = files_metadata[meta_name] dataset_name = None dataset_info = None if dataset.datatype.composite_type == 'auto_primary_file': #replace sniff here with just creating an empty file temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO(d_type.generate_primary_file()), prefix='upload_auto_primary_file') dataset.primary_file = temp_name dataset.space_to_tab = False dataset.precreated_name = dataset.name = 'Uploaded Composite Dataset (%s)' % ( file_type) else: file_bunch, warnings = get_one_filename(groups_incoming[0]) if dataset.datatype.composite_type: precreated_name = 'Uploaded Composite Dataset (%s)' % ( file_type) writable_files_offset = 1 dataset.primary_file = file_bunch.path dataset.space_to_tab = file_bunch.space_to_tab dataset.precreated_name = file_bunch.precreated_name dataset.name = file_bunch.precreated_name dataset.warnings.extend(file_bunch.warnings) if dataset.primary_file is None: #remove this before finish, this should create an empty dataset raise Exception( 'No primary dataset file was available for composite upload' ) keys = [value.name for value in writable_files.values()] for i, group_incoming in enumerate( groups_incoming[writable_files_offset:]): key = keys[i + writable_files_offset] if group_incoming is None and not writable_files[ writable_files.keys()[keys.index(key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) dataset.composite_files[key] = None else: file_bunch, warnings = get_one_filename(group_incoming) if file_bunch.path: dataset.composite_files[key] = file_bunch.__dict__ else: dataset.composite_files[key] = None if not writable_files[writable_files.keys()[keys.index( key)]].optional: dataset.warnings.append( "A required composite file (%s) was not specified." % (key)) return [dataset] else: datasets = get_filenames(context[self.name][0]) rval = [] for dataset in datasets: dataset.file_type = file_type dataset.datatype = d_type dataset.ext = self.get_datatype_ext(trans, context) dataset.dbkey = dbkey rval.append(dataset) return rval