Пример #1
0
 def filenames_from_url_paste( url_paste, group_incoming, override_name = None, override_info = None ):
     filenames = []
     if url_paste not in [ None, "" ]:
         if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ):
             url_paste = url_paste.replace( '\r', '' ).split( '\n' )
             for line in url_paste:
                 line = line.strip()
                 if line:
                     if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ):
                         continue # non-url line, ignore
                     precreated_name = line
                     dataset_name = override_name
                     if not dataset_name:
                         dataset_name = line
                     dataset_info = override_info
                     if not dataset_info:
                         dataset_info = 'uploaded url'
                     try:
                         temp_name, is_multi_byte = sniff.stream_to_file( urllib.urlopen( line ), prefix='url_paste' )
                     except Exception, e:
                         temp_name = None
                         precreated_name = str( e )
                         log.exception( 'exception in sniff.stream_to_file using url_paste %s: %s' % ( url_paste, str( e ) ) )
                         try:
                             self.remove_temp_file( temp_name )
                         except:
                             pass
                     yield ( temp_name, precreated_name, is_multi_byte, dataset_name, dataset_info )
                     #yield ( None, str( e ), False, dataset_name, dataset_info )
         else:
             dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here
             if override_name:
                 dataset_name = override_name
             if override_info:
                 dataset_info = override_info
             is_valid = False
             for line in url_paste: #Trim off empty lines from begining
                 line = line.rstrip( '\r\n' )
                 if line:
                     is_valid = True
                     break
             if is_valid:
                 try:
                     temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( url_paste ), prefix='strio_url_paste' )
                 except Exception, e:
                     log.exception( 'exception in sniff.stream_to_file using StringIO.StringIO( url_paste ) %s: %s' % ( url_paste, str( e ) ) )
                     temp_name = None
                     precreated_name = str( e )
                     try:
                         self.remove_temp_file( temp_name )
                     except:
                         pass
                 yield ( temp_name, precreated_name, is_multi_byte, dataset_name, dataset_info )
Пример #2
0
def _has_src_to_path(upload_config, item, is_dataset=False):
    assert "src" in item, item
    src = item.get("src")
    name = item.get("name")
    if src == "url":
        url = item.get("url")
        try:
            path = sniff.stream_url_to_file(
                url,
                file_sources=get_file_sources(upload_config.working_directory))
        except Exception as e:
            raise Exception("Failed to fetch url {}. {}".format(url, str(e)))

        if not is_dataset:
            # Actual target dataset will validate and put results in dict
            # that gets passed back to Galaxy.
            for hash_function in HASH_NAMES:
                hash_value = item.get(hash_function)
                if hash_value:
                    _handle_hash_validation(upload_config, hash_function,
                                            hash_value, path)
        if name is None:
            name = url.split("/")[-1]
    elif src == "pasted":
        path = sniff.stream_to_file(StringIO(item["paste_content"]))
        if name is None:
            name = "Pasted Entry"
    else:
        assert src == "path"
        path = item["path"]
        if name is None:
            name = os.path.basename(path)
    return name, path
Пример #3
0
def _has_src_to_path(upload_config, item, is_dataset=False):
    assert "src" in item, item
    src = item.get("src")
    name = item.get("name")
    if src == "url":
        url = item.get("url")
        path = sniff.stream_url_to_file(url)
        if not is_dataset:
            # Actual target dataset will validate and put results in dict
            # that gets passed back to Galaxy.
            for hash_function in HASH_NAMES:
                hash_value = item.get(hash_function)
                if hash_value:
                    _handle_hash_validation(upload_config, hash_function,
                                            hash_value, path)
        if name is None:
            name = url.split("/")[-1]
    elif src == "pasted":
        path = sniff.stream_to_file(StringIO(item["paste_content"]))
        if name is None:
            name = "Pasted Entry"
    else:
        assert src == "path"
        path = item["path"]
        if name is None:
            name = os.path.basename(path)
    return name, path
Пример #4
0
def add_composite_file( dataset, registry, json_file, output_path, files_path ):
        if dataset.composite_files:
            os.mkdir( files_path )
            for name, value in dataset.composite_files.iteritems():
                value = util.bunch.Bunch( **value )
                if dataset.composite_file_paths[ value.name ] is None and not value.optional:
                    file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file )
                    break
                elif dataset.composite_file_paths[value.name] is not None:
                    dp = dataset.composite_file_paths[value.name][ 'path' ]
                    isurl = dp.find('://') != -1  # todo fixme
                    if isurl:
                        try:
                            temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dp ), prefix='url_paste' )
                        except Exception, e:
                            file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file )
                            return
                        dataset.path = temp_name
                        dp = temp_name
                    if not value.is_binary:
                        tmpdir = output_adjacent_tmpdir( output_path )
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ):
                            sniff.convert_newlines_sep2tabs( dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
                        else:
                            sniff.convert_newlines( dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix )
                    move_copy(dp, os.path.join( files_path, name ) )
Пример #5
0
def persist_uploads(params, trans):
    """
    Turn any uploads in the submitted form to persisted files.
    """
    if 'files' in params:
        new_files = []
        for upload_dataset in params['files']:
            f = upload_dataset['file_data']
            if isinstance(f, cgi_FieldStorage):
                assert not isinstance(f.file, StringIO)
                assert f.file.name != '<fdopen>'
                local_filename = util.mkstemp_ln(f.file.name,
                                                 'upload_file_data_')
                f.file.close()
                upload_dataset['file_data'] = dict(
                    filename=f.filename, local_filename=local_filename)
            elif type(f) == dict and 'local_filename' not in f:
                raise Exception(
                    'Uploaded file was encoded in a way not understood by Galaxy.'
                )
            if 'url_paste' in upload_dataset and upload_dataset[
                    'url_paste'] and upload_dataset['url_paste'].strip() != '':
                upload_dataset['url_paste'] = stream_to_file(
                    StringIO(
                        validate_url(
                            upload_dataset['url_paste'],
                            trans.app.config.fetch_url_allowlist_ips)),
                    prefix="strio_url_paste_")
            else:
                upload_dataset['url_paste'] = None
            new_files.append(upload_dataset)
        params['files'] = new_files
    return params
Пример #6
0
def add_composite_file(dataset, registry, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.iteritems():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[
                    value.name] is None and not value.optional:
                file_err(
                    'A required composite data file was not provided (%s)' %
                    name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') <> -1  # todo fixme
                if isurl:
                    try:
                        temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                            urllib.urlopen(dp), prefix='url_paste')
                    except Exception, e:
                        file_err('Unable to fetch %s\n%s' % (dp, str(e)),
                                 dataset, json_file)
                        return
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    if dataset.composite_file_paths[value.name].get(
                            'space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp)
                    else:
                        sniff.convert_newlines(dp)
                shutil.move(dp, os.path.join(files_path, name))
Пример #7
0
def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    in_place = dataset.get('in_place', True)

    try:
        ext = dataset.file_type
    except AttributeError:
        file_err(
            'Unable to process uploaded file, missing file_type parameter.',
            dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urllib.urlopen(
                dataset.path)  #page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                page,
                prefix='url_paste',
                source_encoding=util.get_charset_from_http_headers(
                    page.headers))
        except Exception, e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)),
                     dataset, json_file)
            return
        dataset.path = temp_name
Пример #8
0
def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.items():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[value.name] is None and not value.optional:
                file_err('A required composite data file was not provided (%s)' % name, dataset, json_file)
                break
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') != -1  # todo fixme
                if isurl:
                    try:
                        temp_name, dataset.is_multi_byte = sniff.stream_to_file(urlopen(dp), prefix='url_paste')
                    except Exception as e:
                        file_err('Unable to fetch %s\n%s' % (dp, str(e)), dataset, json_file)
                        return
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    tmpdir = output_adjacent_tmpdir(output_path)
                    tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                    if dataset.composite_file_paths[value.name].get('space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                    else:
                        sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                shutil.move(dp, os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
    json_file.write(dumps(info) + "\n")
Пример #9
0
def add_composite_file(dataset, json_file, output_path, files_path):
    if dataset.composite_files:
        os.mkdir(files_path)
        for name, value in dataset.composite_files.items():
            value = util.bunch.Bunch(**value)
            if dataset.composite_file_paths[value.name] is None and not value.optional:
                raise UploadProblemException('A required composite data file was not provided (%s)' % name)
            elif dataset.composite_file_paths[value.name] is not None:
                dp = dataset.composite_file_paths[value.name]['path']
                isurl = dp.find('://') != -1  # todo fixme
                if isurl:
                    try:
                        temp_name = sniff.stream_to_file(urlopen(dp), prefix='url_paste')
                    except Exception as e:
                        raise UploadProblemException('Unable to fetch %s\n%s' % (dp, str(e)))
                    dataset.path = temp_name
                    dp = temp_name
                if not value.is_binary:
                    tmpdir = output_adjacent_tmpdir(output_path)
                    tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                    if dataset.composite_file_paths[value.name].get('space_to_tab', value.space_to_tab):
                        sniff.convert_newlines_sep2tabs(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                    else:
                        sniff.convert_newlines(dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                shutil.move(dp, os.path.join(files_path, name))
    # Move the dataset to its "real" path
    shutil.move(dataset.primary_file, output_path)
    # Write the job info
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                stdout='uploaded %s file' % dataset.file_type)
    json_file.write(dumps(info) + "\n")
Пример #10
0
def url_upload(dataset):
    """
    @summary: Upload an URL file and update dataset information.
    @param dataset: [Bunch] The dataset for the uploaded file.
    """
    url = dataset.path
    page = urllib.urlopen( url )
    temp_name, dataset.is_multi_byte = sniff.stream_to_file( page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers( page.headers ) )
    dataset.path = temp_name
    dataset.name = url.split('/')[-1]
Пример #11
0
    def to_path(path_or_url):
        is_url = path_or_url.find('://') != -1  # todo fixme
        if is_url:
            try:
                temp_name = sniff.stream_to_file(urlopen(path_or_url), prefix='url_paste')
            except Exception as e:
                raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, unicodify(e)))

            return temp_name, is_url

        return path_or_url, is_url
Пример #12
0
    def to_path(path_or_url):
        is_url = path_or_url.find('://') != -1  # todo fixme
        if is_url:
            try:
                temp_name = sniff.stream_to_file(urlopen(path_or_url), prefix='url_paste')
            except Exception as e:
                raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, str(e)))

            return temp_name, is_url

        return path_or_url, is_url
Пример #13
0
def add_file(dataset, json_file, output_path):
    data_type = None
    line_count = None

    if dataset.type == "url":
        try:
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(urllib.urlopen(dataset.path), prefix="url_paste")
        except Exception, e:
            file_err("Unable to fetch %s\n%s" % (dataset.path, str(e)), dataset, json_file)
            return
        dataset.path = temp_name
Пример #14
0
def add_file( dataset, json_file ):
    data_type = None
    line_count = None

    if dataset.type == 'url':
        try:
            temp_name, is_multi_byte = sniff.stream_to_file( urllib.urlopen( dataset.path ), prefix='url_paste' )
        except Exception, e:
            file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file )
            return
        dataset.path = temp_name
        dataset.is_multi_byte = is_multi_byte
Пример #15
0
def url_upload(dataset):
    """
    @summary: Upload an URL file and update dataset information.
    @param dataset: [Bunch] The dataset for the uploaded file.
    """
    url = dataset.path
    page = urllib.urlopen(url)
    temp_name, dataset.is_multi_byte = sniff.stream_to_file(
        page,
        prefix='url_paste',
        source_encoding=util.get_charset_from_http_headers(page.headers))
    dataset.path = temp_name
    dataset.name = url.split('/')[-1]
Пример #16
0
def add_file(dataset, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None

    if dataset.type == 'url':
        try:
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                urllib.urlopen(dataset.path), prefix='url_paste')
        except Exception, e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)),
                     dataset, json_file)
            return
        dataset.path = temp_name
Пример #17
0
def _has_src_to_path(item):
    assert "src" in item, item
    src = item.get("src")
    name = item.get("name")
    if src == "url":
        url = item.get("url")
        path = sniff.stream_url_to_file(url)
        if name is None:
            name = url.split("/")[-1]
    elif src == "pasted":
        path = sniff.stream_to_file(StringIO(item["paste_content"]))
        if name is None:
            name = "Pasted Entry"
    else:
        assert src == "path"
        path = item["path"]
        if name is None:
            name = os.path.basename(path)
    return name, path
Пример #18
0
def _has_src_to_path(item):
    assert "src" in item, item
    src = item.get("src")
    name = item.get("name")
    if src == "url":
        url = item.get("url")
        path = sniff.stream_url_to_file(url)
        if name is None:
            name = url.split("/")[-1]
    elif src == "pasted":
        path = sniff.stream_to_file(StringIO(item["paste_content"]))
        if name is None:
            name = "Pasted Entry"
    else:
        assert src == "path"
        path = item["path"]
        if name is None:
            name = os.path.basename(path)
    return name, path
Пример #19
0
def add_file( dataset, registry, json_file, output_path ):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get( 'link_data_only', 'copy_files' )

    try:
        ext = dataset.file_type
    except AttributeError:
        file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file )
        return

    if dataset.type == 'url':
        try:
            temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dataset.path ), prefix='url_paste' )
        except Exception, e:
            file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file )
            return
        dataset.path = temp_name
Пример #20
0
def add_file( dataset, registry, json_file, output_path ):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get( 'link_data_only', 'copy_files' )

    try:
        ext = dataset.file_type
    except AttributeError:
        file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file )
        return

    if dataset.type == 'url':
        try:
            temp_name, dataset.is_multi_byte = sniff.stream_to_file( urllib.urlopen( dataset.path ), prefix='url_paste' )
        except Exception, e:
            file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file )
            return
        dataset.path = temp_name
Пример #21
0
def add_file( dataset, registry, json_file, output_path ):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get( 'link_data_only', 'copy_files' )
    in_place = dataset.get( 'in_place', True )

    try:
        ext = dataset.file_type
    except AttributeError:
        file_err( 'Unable to process uploaded file, missing file_type parameter.', dataset, json_file )
        return

    if dataset.type == 'url':
        try:
            page = urllib.urlopen( dataset.path )  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file( page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers( page.headers ) )
        except Exception, e:
            file_err( 'Unable to fetch %s\n%s' % ( dataset.path, str( e ) ), dataset, json_file )
            return
        dataset.path = temp_name
Пример #22
0
 def get_data_file_filename( data_file, is_multi_byte = False, override_name = None, override_info = None ):
     dataset_name = override_name
     dataset_info = override_info
     def get_file_name( file_name ):
         file_name = file_name.split( '\\' )[-1]
         file_name = file_name.split( '/' )[-1]
         return file_name
     if 'local_filename' in dir( data_file ):
         # Use the existing file
         return data_file.local_filename, get_file_name( data_file.filename ), is_multi_byte
     elif 'filename' in dir( data_file ):
         #create a new tempfile
         try:
             temp_name, is_multi_byte = sniff.stream_to_file( data_file.file, prefix='upload' )
             precreated_name = get_file_name( data_file.filename )
             if not dataset_name:
                 dataset_name = precreated_name
             if not dataset_info:
                 dataset_info = 'uploaded file'
             return temp_name, get_file_name( data_file.filename ), is_multi_byte, dataset_name, dataset_info
         except Exception, e:
             log.exception( 'exception in sniff.stream_to_file using file %s: %s' % ( data_file.filename, str( e ) ) )
             self.remove_temp_file( temp_name )
Пример #23
0
    def add_file(self, trans, file_obj, file_name, file_type, dbkey, info ):
        temp_name = sniff.stream_to_file(file_obj)
        sniff.convert_newlines(temp_name)
        if file_type == 'auto':
            ext = sniff.guess_ext(temp_name)    
        else:
            ext = file_type

        data = trans.app.model.Dataset()
        data.name = file_name
        data.extension = ext
        data.dbkey = dbkey
        data.info = info
        data.flush()
        shutil.move(temp_name, data.file_name)
        data.state = data.states.OK
        data.init_meta()
        data.set_peek()
        if isinstance( data.datatype, datatypes.interval.Interval ):
            if data.missing_meta():
                data.extension = 'tabular'
        trans.history.add_dataset( data )
        trans.app.model.flush()
        return data
Пример #24
0
    def add_file(self, trans, file_obj, file_name, file_type, dbkey, info):
        temp_name = sniff.stream_to_file(file_obj)
        sniff.convert_newlines(temp_name)
        if file_type == 'auto':
            ext = sniff.guess_ext(temp_name)
        else:
            ext = file_type

        data = trans.app.model.Dataset()
        data.name = file_name
        data.extension = ext
        data.dbkey = dbkey
        data.info = info
        data.flush()
        shutil.move(temp_name, data.file_name)
        data.state = data.states.OK
        data.init_meta()
        data.set_peek()
        if isinstance(data.datatype, datatypes.interval.Interval):
            if data.missing_meta():
                data.extension = 'tabular'
        trans.history.add_dataset(data)
        trans.app.model.flush()
        return data
Пример #25
0
def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    run_as_real_user = in_place = dataset.get('in_place', True)
    purge_source = dataset.get('purge_source', True)
    # in_place is True if there is no external chmod in place,
    # however there are other instances where modifications should not occur in_place:
    # when a file is added from a directory on the local file system (ftp import folder or any other path).
    if dataset.type in ('server_dir', 'path_paste', 'ftp_import'):
        in_place = False
    check_content = dataset.get('check_content' , True)
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        file_err('Unable to process uploaded file, missing file_type parameter.', dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urlopen(dataset.path)  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
        except Exception as e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)), dataset, json_file)
            return
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        file_err('Uploaded temporary file (%s) does not exist.' % dataset.path, dataset, json_file)
        return
    if not os.path.getsize(dataset.path) > 0:
        file_err('The uploaded file is empty', dataset, json_file)
        return
    if not dataset.type == 'url':
        # Already set is_multi_byte above if type == 'url'
        try:
            dataset.is_multi_byte = multi_byte.is_multi_byte(codecs.open(dataset.path, 'r', 'utf-8').read(100))
        except UnicodeDecodeError as e:
            dataset.is_multi_byte = False
    # Is dataset an image?
    i_ext = get_image_ext(dataset.path)
    if i_ext:
        ext = i_ext
        data_type = ext
    # Is dataset content multi-byte?
    elif dataset.is_multi_byte:
        data_type = 'multi-byte char'
        ext = sniff.guess_ext(dataset.path, registry.sniff_order, is_multi_byte=True)
    # Is dataset content supported sniffable binary?
    else:
        # FIXME: This ignores the declared sniff order in datatype_conf.xml
        # resulting in improper behavior
        type_info = Binary.is_sniffable_binary(dataset.path)
        if type_info:
            data_type = type_info[0]
            ext = type_info[1]
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path, check_content=check_content)
            if is_gzipped and not is_valid:
                file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file)
                return
            elif is_gzipped and is_valid and auto_decompress:
                if link_data_only == 'copy_files':
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2 ** 20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            file_err('Problem decompressing gzipped data', dataset, json_file)
                            return
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path, check_content)
                if is_bzipped and not is_valid:
                    file_err('The gzipped uploaded file contains inappropriate content', dataset, json_file)
                    return
                elif is_bzipped and is_valid and auto_decompress:
                    if link_data_only == 'copy_files':
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                file_err('Problem decompressing bz2 compressed data', dataset, json_file)
                                return
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped and auto_decompress:
                    if link_data_only == 'copy_files':
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        file_err('Problem decompressing zipped data', dataset, json_file)
                                        return
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    outfile = open(uncompressed, 'wb')
                                    outfile.write(z.read(name))
                                    outfile.close()
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    file_err('Problem decompressing zipped data', dataset, json_file)
                                    return
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                # TODO refactor this logic.  check_binary isn't guaranteed to be
                # correct since it only looks at whether the first 100 chars are
                # printable or not.  If someone specifies a known unsniffable
                # binary datatype and check_binary fails, the file gets mangled.
                if check_binary(dataset.path) or Binary.is_ext_unsniffable(dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    # binary_ok = False
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        if check_content and not Binary.is_ext_unsniffable(ext):
                            file_err('The uploaded binary file contains inappropriate content', dataset, json_file)
                            return
                        elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext.capitalize(), ext)
                            file_err(err_msg, dataset, json_file)
                            return
            if not data_type:
                # We must have a text file
                if check_content and check_html(dataset.path):
                    file_err('The uploaded file contains inappropriate HTML content', dataset, json_file)
                    return
            if data_type != 'binary':
                if link_data_only == 'copy_files' and data_type not in ('gzip', 'bz2', 'zip'):
                    # Convert universal line endings to Posix line endings if to_posix_lines is True
                    # and the data is not binary or gzip-, bz2- or zip-compressed.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and data_type == 'binary':
        ext = 'data'
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir', 'path_paste') and link_data_only == 'link_to_files':
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            file_err(err_msg, dataset, json_file)
            return
    if link_data_only == 'copy_files' and converted_path:
        # Move the dataset to its "real" path
        try:
            shutil.move(converted_path, output_path)
        except OSError as e:
            # We may not have permission to remove converted_path
            if e.errno != errno.EACCES:
                raise
    elif link_data_only == 'copy_files':
        if purge_source and not run_as_real_user:
            # if the upload tool runs as a real user the real user
            # can't move dataset.path as this path is owned by galaxy.
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")
    if link_data_only == 'copy_files' and datatype and datatype.dataset_content_needs_grooming(output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
Пример #26
0
    def _resolve_item(item):
        # Might be a dataset or a composite upload.
        requested_ext = item.get("ext", None)
        registry = upload_config.registry
        datatype = registry.get_datatype_by_extension(requested_ext)
        composite = item.pop("composite", None)
        if datatype and datatype.composite_type:
            composite_type = datatype.composite_type
            assert composite_type == "auto_primary_file", "basic composite uploads not yet implemented"

            # get_composite_dataset_name finds dataset name from basename of contents
            # and such but we're not implementing that here yet. yagni?
            # also need name...
            metadata = {
                composite_file.substitute_name_with_metadata: datatype.metadata_spec[
                    composite_file.substitute_name_with_metadata
                ].default
                for composite_file in datatype.composite_files.values()
                if composite_file.substitute_name_with_metadata
            }
            name = item.get("name") or "Composite Dataset"
            metadata["base_name"] = name
            dataset = Bunch(
                name=name,
                metadata=metadata,
            )
            writable_files = datatype.get_writable_files_for_dataset(dataset)
            primary_file = sniff.stream_to_file(
                StringIO(datatype.generate_primary_file(dataset)),
                prefix="upload_auto_primary_file",
                dir=".",
            )
            extra_files_path = f"{primary_file}_extra"
            os.mkdir(extra_files_path)
            rval: Dict[str, Any] = {
                "name": name,
                "filename": primary_file,
                "ext": requested_ext,
                "link_data_only": False,
                "sources": [],
                "hashes": [],
                "extra_files": extra_files_path,
            }
            _copy_and_validate_simple_attributes(item, rval)
            composite_items = composite.get("elements", [])
            keys = list(writable_files.keys())
            composite_item_idx = 0
            for composite_item in composite_items:
                if composite_item_idx >= len(keys):
                    # raise exception - too many files?
                    pass
                key = keys[composite_item_idx]
                writable_file = writable_files[key]
                _, src_target = _has_src_to_path(upload_config, composite_item)
                # do the writing
                sniff.handle_composite_file(
                    datatype,
                    src_target,
                    extra_files_path,
                    key,
                    writable_file.is_binary,
                    ".",
                    f"{os.path.basename(extra_files_path)}_",
                    composite_item,
                )
                composite_item_idx += 1

            writable_files_idx = composite_item_idx
            while writable_files_idx < len(keys):
                key = keys[writable_files_idx]
                writable_file = writable_files[key]
                if not writable_file.optional:
                    # raise Exception, non-optional file missing
                    pass
                writable_files_idx += 1
            return rval
        else:
            if composite:
                raise Exception(f"Non-composite datatype [{datatype}] attempting to be created with composite data.")
            return _resolve_item_with_primary(item)
Пример #27
0
def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files')
    in_place = dataset.get('in_place', True)
    purge_source = dataset.get('purge_source', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        file_err(
            'Unable to process uploaded file, missing file_type parameter.',
            dataset, json_file)
        return

    if dataset.type == 'url':
        try:
            page = urlopen(
                dataset.path)  # page will be .close()ed by sniff methods
            temp_name, dataset.is_multi_byte = sniff.stream_to_file(
                page,
                prefix='url_paste',
                source_encoding=util.get_charset_from_http_headers(
                    page.headers))
        except Exception as e:
            file_err('Unable to fetch %s\n%s' % (dataset.path, str(e)),
                     dataset, json_file)
            return
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        file_err('Uploaded temporary file (%s) does not exist.' % dataset.path,
                 dataset, json_file)
        return
    if not os.path.getsize(dataset.path) > 0:
        file_err('The uploaded file is empty', dataset, json_file)
        return
    if not dataset.type == 'url':
        # Already set is_multi_byte above if type == 'url'
        try:
            dataset.is_multi_byte = multi_byte.is_multi_byte(
                codecs.open(dataset.path, 'r', 'utf-8').read(100))
        except UnicodeDecodeError as e:
            dataset.is_multi_byte = False
    # Is dataset an image?
    i_ext = get_image_ext(dataset.path)
    if i_ext:
        ext = i_ext
        data_type = ext
    # Is dataset content multi-byte?
    elif dataset.is_multi_byte:
        data_type = 'multi-byte char'
        ext = sniff.guess_ext(dataset.path,
                              registry.sniff_order,
                              is_multi_byte=True)
    # Is dataset content supported sniffable binary?
    else:
        # FIXME: This ignores the declared sniff order in datatype_conf.xml
        # resulting in improper behavior
        type_info = Binary.is_sniffable_binary(dataset.path)
        if type_info:
            data_type = type_info[0]
            ext = type_info[1]
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path)
            if is_gzipped and not is_valid:
                file_err(
                    'The gzipped uploaded file contains inappropriate content',
                    dataset, json_file)
                return
            elif is_gzipped and is_valid:
                if link_data_only == 'copy_files':
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2**20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(
                        prefix='data_id_%s_upload_gunzip_' %
                        dataset.dataset_id,
                        dir=os.path.dirname(output_path),
                        text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            file_err('Problem decompressing gzipped data',
                                     dataset, json_file)
                            return
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if dataset.type in ('server_dir',
                                        'path_paste') or not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type and bz2 is not None:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path)
                if is_bzipped and not is_valid:
                    file_err(
                        'The gzipped uploaded file contains inappropriate content',
                        dataset, json_file)
                    return
                elif is_bzipped and is_valid:
                    if link_data_only == 'copy_files':
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2**20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(
                            prefix='data_id_%s_upload_bunzip2_' %
                            dataset.dataset_id,
                            dir=os.path.dirname(output_path),
                            text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                file_err(
                                    'Problem decompressing bz2 compressed data',
                                    dataset, json_file)
                                return
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if dataset.type in ('server_dir',
                                            'path_paste') or not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped:
                    if link_data_only == 'copy_files':
                        CHUNK_SIZE = 2**20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(
                                prefix='data_id_%s_upload_zip_' %
                                dataset.dataset_id,
                                dir=os.path.dirname(output_path),
                                text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        file_err(
                                            'Problem decompressing zipped data',
                                            dataset, json_file)
                                        return
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    outfile = open(uncompressed, 'wb')
                                    outfile.write(z.read(name))
                                    outfile.close()
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    file_err(
                                        'Problem decompressing zipped data',
                                        dataset, json_file)
                                    return
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if dataset.type in ('server_dir',
                                                'path_paste') or not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                # TODO refactor this logic.  check_binary isn't guaranteed to be
                # correct since it only looks at whether the first 100 chars are
                # printable or not.  If someone specifies a known unsniffable
                # binary datatype and check_binary fails, the file gets mangled.
                if check_binary(dataset.path) or Binary.is_ext_unsniffable(
                        dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    # binary_ok = False
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        if not Binary.is_ext_unsniffable(ext):
                            file_err(
                                'The uploaded binary file contains inappropriate content',
                                dataset, json_file)
                            return
                        elif Binary.is_ext_unsniffable(
                                ext) and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (
                                ext.capitalize(), ext)
                            file_err(err_msg, dataset, json_file)
                            return
            if not data_type:
                # We must have a text file
                if check_html(dataset.path):
                    file_err(
                        'The uploaded file contains inappropriate HTML content',
                        dataset, json_file)
                    return
            if data_type != 'binary':
                if link_data_only == 'copy_files':
                    if dataset.type in ('server_dir',
                                        'path_paste') and data_type not in [
                                            'gzip', 'bz2', 'zip'
                                        ]:
                        in_place = False
                    # Convert universal line endings to Posix line endings, but allow the user to turn it off,
                    # so that is becomes possible to upload gzip, bz2 or zip files with binary data without
                    # corrupting the content of those files.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(
                                dataset.path,
                                in_place=in_place,
                                tmp_dir=tmpdir,
                                tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(
                                dataset.path,
                                in_place=in_place,
                                tmp_dir=tmpdir,
                                tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir',
                        'path_paste') and link_data_only == 'link_to_files':
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            file_err(err_msg, dataset, json_file)
            return
    if link_data_only == 'copy_files' and dataset.type in (
            'server_dir',
            'path_paste') and data_type not in ['gzip', 'bz2', 'zip']:
        # Move the dataset to its "real" path
        if converted_path is not None:
            shutil.copy(converted_path, output_path)
            try:
                os.remove(converted_path)
            except:
                pass
        else:
            # This should not happen, but it's here just in case
            shutil.copy(dataset.path, output_path)
    elif link_data_only == 'copy_files':
        if purge_source:
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")

    if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
Пример #28
0
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None,
                                   purge=True):
            dataset_name = override_name

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=dataset_name,
                             purge_source=purge)
            except Exception:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)

        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file).read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:10].lower()
                    looks_like_url = False
                    for url_prefix in URI_PREFIXES:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    for line in url_paste.replace("\r", "").split("\n"):
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=dataset_name)

        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress',
                           None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
                if ftp_files:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip(
            ):  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                assert not os.path.islink(
                    user_ftp_dir
                ), "User FTP directory cannot be a symbolic link"
                for dirpath, _dirnames, filenames in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename),
                                               user_ftp_dir)
                                if not os.path.islink(
                                        os.path.join(dirpath, filename)):
                                    ftp_data_file = {
                                        'local_filename':
                                        os.path.abspath(
                                            os.path.join(user_ftp_dir, path)),
                                        'filename':
                                        os.path.basename(path)
                                    }
                                    purge = getattr(trans.app.config,
                                                    'ftp_upload_purge', True)
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info,
                                        purge=purge,
                                    )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.auto_decompress = auto_decompress
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            if file_type is not None:
                file_bunch.file_type = file_type
            if dbkey is not None:
                file_bunch.dbkey = dbkey
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress',
                           None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, str)
                ]
                if trans.user is None:
                    log.warning(
                        f'Anonymous user passed values in ftp_files: {ftp_files}'
                    )
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(
                        user_ftp_dir
                    ), "User FTP directory cannot be a symbolic link"
                    for dirpath, _dirnames, filenames in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, str):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        f'User passed an invalid file path in ftp_files: {ftp_file}'
                    )
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info,
                                                    purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        file_count = self.get_file_count(trans, context)
        d_type = self.get_datatype(trans, context)
        dbkey = self.get_dbkey(context)
        tag_using_filenames = context.get('tag_using_filenames', False)
        tags = context.get('tags', False)
        force_composite = asbool(context.get('force_composite', 'False'))
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for _ in range(file_count)]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None or force_composite:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Dataset()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            dataset.tag_using_filenames = None
            dataset.tags = None
            # load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = {
                composite_file.substitute_name_with_metadata:
                d_type.metadata_spec[
                    composite_file.substitute_name_with_metadata].default
                for composite_file in d_type.composite_files.values()
                if composite_file.substitute_name_with_metadata
            }
            for meta_name, meta_spec in d_type.metadata_spec.items():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(
                                meta_value,
                                default=metadata_name_substition_default_dict[
                                    meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.name = self.get_composite_dataset_name(context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name = sniff.stream_to_file(
                    io.StringIO(d_type.generate_primary_file(dataset)),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.auto_decompress = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.auto_decompress = file_bunch.auto_decompress
                dataset.space_to_tab = file_bunch.space_to_tab
                if file_bunch.file_type:
                    dataset.file_type = file_type
                if file_bunch.dbkey:
                    dataset.dbkey = dbkey
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            if not force_composite:
                keys = [value.name for value in writable_files.values()]
            else:
                keys = [str(index) for index in range(file_count)]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if not force_composite and group_incoming is None and not writable_files[
                        list(writable_files.keys())[keys.index(key)]].optional:
                    dataset.warnings.append(
                        f"A required composite file ({key}) was not specified."
                    )
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        if force_composite:
                            key = group_incoming.get("NAME") or i
                        dataset.composite_files[key] = file_bunch.__dict__
                    elif not force_composite:
                        dataset.composite_files[key] = None
                        if not writable_files[list(writable_files.keys())[
                                keys.index(key)]].optional:
                            dataset.warnings.append(
                                f"A required composite file ({key}) was not specified."
                            )
            return [dataset]
        else:
            rval = []
            for i, file_contexts in enumerate(context[self.name]):
                datasets = get_filenames(file_contexts)
                for dataset in datasets:
                    override_file_type = self.get_file_type(
                        context[self.name][i], parent_context=context)
                    d_type = self.get_datatype(trans,
                                               context[self.name][i],
                                               parent_context=context)
                    dataset.file_type = override_file_type
                    dataset.datatype = d_type
                    dataset.ext = self.get_datatype_ext(trans,
                                                        context[self.name][i],
                                                        parent_context=context)
                    dataset.dbkey = self.get_dbkey(context[self.name][i],
                                                   parent_context=context)
                    dataset.tag_using_filenames = tag_using_filenames
                    dataset.tags = tags
                    rval.append(dataset)
            return rval
Пример #29
0
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=dataset_name)
                #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)
                #return None, None, None, None, None
        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            filenames = []
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read(1024)
                if url_paste.lstrip().lower().startswith(
                        'http://') or url_paste.lstrip().lower().startswith(
                            'ftp://') or url_paste.lstrip().lower().startswith(
                                'https://'):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith(
                                    'http://') and not line.lower().startswith(
                                        'ftp://') and not line.lower(
                                        ).startswith('https://'):
                                continue  # non-url line, ignore
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                            #yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  #we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=precreated_name)
                    #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )
        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
                if ftp_files:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip(
            ):  #we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append(
                        "All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename),
                                               user_ftp_dir)
                                if not os.path.islink(
                                        os.path.join(dirpath, filename)):
                                    ftp_data_file = {
                                        'local_filename':
                                        os.path.abspath(
                                            os.path.join(user_ftp_dir, path)),
                                        'filename':
                                        os.path.basename(path)
                                    }
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info)
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            warnings = []
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, unicode)
                ]
                if trans.user is None:
                    log.warning(
                        'Anonymous user passed values in ftp_files: %s' %
                        ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for (dirpath, dirnames,
                         filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        'User passed an invalid file path in ftp_files: %s' %
                        ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        d_type = self.get_datatype(trans, context)
        dbkey = context.get('dbkey', None)
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for filename in writable_files]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None:
            #handle uploading of composite datatypes
            #Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            #load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = dict([
                (composite_file.substitute_name_with_metadata,
                 d_type.metadata_spec[
                     composite_file.substitute_name_with_metadata].default)
                for composite_file in d_type.composite_files.values()
                if composite_file.substitute_name_with_metadata
            ])
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(
                                meta_value,
                                default=metadata_name_substition_default_dict[
                                    meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.precreated_name = dataset.name = self.get_composite_dataset_name(
                context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                #replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file(
                    StringIO.StringIO(d_type.generate_primary_file(dataset)),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  #remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            keys = [value.name for value in writable_files.values()]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if group_incoming is None and not writable_files[
                        writable_files.keys()[keys.index(key)]].optional:
                    dataset.warnings.append(
                        "A required composite file (%s) was not specified." %
                        (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        dataset.composite_files[key] = file_bunch.__dict__
                    else:
                        dataset.composite_files[key] = None
                        if not writable_files[writable_files.keys()[keys.index(
                                key)]].optional:
                            dataset.warnings.append(
                                "A required composite file (%s) was not specified."
                                % (key))
            return [dataset]
        else:
            datasets = get_filenames(context[self.name][0])
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext(trans, context)
                dataset.dbkey = dbkey
                rval.append(dataset)
            return rval
Пример #30
0
    def _resolve_item(item):
        # Might be a dataset or a composite upload.
        requested_ext = item.get("ext", None)
        registry = upload_config.registry
        datatype = registry.get_datatype_by_extension(requested_ext)
        composite = item.pop("composite", None)
        if datatype and datatype.composite_type:
            composite_type = datatype.composite_type
            writable_files = datatype.writable_files
            assert composite_type == "auto_primary_file", "basic composite uploads not yet implemented"

            # get_composite_dataset_name finds dataset name from basename of contents
            # and such but we're not implementing that here yet. yagni?
            # also need name...
            dataset_bunch = Bunch()
            name = item.get("name") or 'Composite Dataset'
            dataset_bunch.name = name
            primary_file = sniff.stream_to_file(
                StringIO(datatype.generate_primary_file(dataset_bunch)),
                prefix='upload_auto_primary_file',
                dir=".")
            extra_files_path = primary_file + "_extra"
            os.mkdir(extra_files_path)
            rval = {
                "name": name,
                "filename": primary_file,
                "ext": requested_ext,
                "link_data_only": False,
                "sources": [],
                "hashes": [],
                "extra_files": extra_files_path,
            }
            _copy_and_validate_simple_attributes(item, rval)
            composite_items = composite.get("elements", [])
            keys = [value.name for value in writable_files.values()]
            composite_item_idx = 0
            for composite_item in composite_items:
                if composite_item_idx >= len(keys):
                    # raise exception - too many files?
                    pass
                key = keys[composite_item_idx]
                writable_file = writable_files[key]
                _, src_target = _has_src_to_path(upload_config, composite_item)
                # do the writing
                sniff.handle_composite_file(
                    datatype,
                    src_target,
                    extra_files_path,
                    key,
                    writable_file.is_binary,
                    ".",
                    os.path.basename(extra_files_path) + "_",
                    composite_item,
                )
                composite_item_idx += 1

            writable_files_idx = composite_item_idx
            while writable_files_idx < len(keys):
                key = keys[writable_files_idx]
                writable_file = writable_files[key]
                if not writable_file.optional:
                    # raise Exception, non-optional file missing
                    pass
                writable_files_idx += 1
            return rval
        else:
            if composite:
                raise Exception(
                    "Non-composite datatype [%s] attempting to be created with composite data."
                    % datatype)
            return _resolve_item_with_primary(item)
Пример #31
0
    def get_uploaded_datasets(self,
                              trans,
                              context,
                              override_name=None,
                              override_info=None):
        def get_data_file_filename(data_file,
                                   override_name=None,
                                   override_info=None):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name

            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch(type='file',
                             path=data_file['local_filename'],
                             name=get_file_name(data_file['filename']))
                #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)
                #return None, None, None, None, None
        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            filenames = []
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read(1024)
                if url_paste.lstrip().lower().startswith(
                        'http://') or url_paste.lstrip().lower().startswith(
                            'ftp://'):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith(
                                    'http://') and not line.lower().startswith(
                                        'ftp://'):
                                continue  # non-url line, ignore
                            precreated_name = line
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch(type='url',
                                        path=line,
                                        name=precreated_name)
                            #yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  #we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=precreated_name)
                    #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )
        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            warnings = []
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path and url_paste:
                if url_paste.strip():
                    warnings.append(
                        "All file contents specified in the paste box were ignored."
                    )
            else:  #we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(
                        context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            url_paste = context['url_paste']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None]:
                space_to_tab = True
            warnings = []
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                file_bunch.space_to_tab = space_to_tab
                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            return rval

        file_type = self.get_file_type(context)
        d_type = self.get_datatype(trans, context)
        dbkey = context.get('dbkey', None)
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for filename in writable_files]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None:
            #handle uploading of composite datatypes
            #Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            #load metadata
            files_metadata = context.get(self.metadata_ref, {})
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        dataset.metadata[meta_name] = files_metadata[meta_name]
            dataset_name = None
            dataset_info = None
            if dataset.datatype.composite_type == 'auto_primary_file':
                #replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file(
                    StringIO.StringIO(d_type.generate_primary_file()),
                    prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.space_to_tab = False
                dataset.precreated_name = dataset.name = 'Uploaded Composite Dataset (%s)' % (
                    file_type)
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                if dataset.datatype.composite_type:
                    precreated_name = 'Uploaded Composite Dataset (%s)' % (
                        file_type)
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.precreated_name = file_bunch.precreated_name
                dataset.name = file_bunch.precreated_name
                dataset.warnings.extend(file_bunch.warnings)
            if dataset.primary_file is None:  #remove this before finish, this should create an empty dataset
                raise Exception(
                    'No primary dataset file was available for composite upload'
                )
            keys = [value.name for value in writable_files.values()]
            for i, group_incoming in enumerate(
                    groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if group_incoming is None and not writable_files[
                        writable_files.keys()[keys.index(key)]].optional:
                    dataset.warnings.append(
                        "A required composite file (%s) was not specified." %
                        (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    if file_bunch.path:
                        dataset.composite_files[key] = file_bunch.__dict__
                    else:
                        dataset.composite_files[key] = None
                        if not writable_files[writable_files.keys()[keys.index(
                                key)]].optional:
                            dataset.warnings.append(
                                "A required composite file (%s) was not specified."
                                % (key))
            return [dataset]
        else:
            datasets = get_filenames(context[self.name][0])
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext(trans, context)
                dataset.dbkey = dbkey
                rval.append(dataset)
            return rval
Пример #32
0
    def get_uploaded_datasets( self, trans, context, override_name=None, override_info=None ):
        def get_data_file_filename( data_file, override_name=None, override_info=None ):
            dataset_name = override_name
            dataset_info = override_info

            def get_file_name( file_name ):
                file_name = file_name.split( '\\' )[-1]
                file_name = file_name.split( '/' )[-1]
                return file_name
            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name( data_file['filename'] )
                if not dataset_info:
                    dataset_info = 'uploaded file'
                return Bunch( type='file', path=data_file['local_filename'], name=dataset_name )
                # return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
            except:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch( type=None, path=None, name=None )
                # return None, None, None, None, None

        def get_url_paste_urls_or_filename( group_incoming, override_name=None, override_info=None ):
            url_paste_file = group_incoming.get( 'url_paste', None )
            if url_paste_file is not None:
                url_paste = open( url_paste_file, 'r' ).read( 1024 )
                if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ) or url_paste.lstrip().lower().startswith( 'https://' ):
                    url_paste = url_paste.replace( '\r', '' ).split( '\n' )
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ) and not line.lower().startswith( 'https://' ):
                                continue  # non-url line, ignore
                            dataset_name = override_name
                            if not dataset_name:
                                dataset_name = line
                            dataset_info = override_info
                            if not dataset_info:
                                dataset_info = 'uploaded url'
                            yield Bunch( type='url', path=line, name=dataset_name )
                            # yield ( 'url', line, precreated_name, dataset_name, dataset_info )
                else:
                    dataset_name = dataset_info = precreated_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    if override_info:
                        dataset_info = override_info
                    yield Bunch( type='file', path=url_paste_file, name=precreated_name )
                    # yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )

        def get_one_filename( context ):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            uuid = context.get( 'uuid', None ) or None  # Turn '' to None
            warnings = []
            to_posix_lines = False
            if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
                to_posix_lines = True
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info )
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append( "All file contents specified in the paste box were ignored." )
                if ftp_files:
                    warnings.append( "All FTP uploaded file selections were ignored." )
            elif url_paste is not None and url_paste.strip():  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append( "All FTP uploaded file selections were ignored." )
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                                if not os.path.islink( os.path.join( dirpath, filename ) ):
                                    ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ),
                                                      'filename' : os.path.basename( path ) }
                                    file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            return file_bunch, warnings

        def get_filenames( context ):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get( 'uuid', None ) or None  # Turn '' to None
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            to_posix_lines = False
            if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
                to_posix_lines = True
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            file_bunch = get_data_file_filename( data_file, override_name=name, override_info=info )
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append( file_bunch )
            for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info ):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)]
                if trans.user is None:
                    log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files )
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                        for filename in filenames:
                            path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                            if not os.path.islink( os.path.join( dirpath, filename ) ):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(unicodedata.normalize('NFC', path ))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file )
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ),
                                  'filename' : os.path.basename( ftp_file ) }
                file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info )
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            return rval
        file_type = self.get_file_type( context )
        d_type = self.get_datatype( trans, context )
        dbkey = context.get( 'dbkey', None )
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [ None for _ in writable_files ]
        for group_incoming in context.get( self.name, [] ):
            i = int( group_incoming['__index__'] )
            groups_incoming[ i ] = group_incoming
        if d_type.composite_type is not None:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            # load metadata
            files_metadata = context.get( self.metadata_ref, {} )
            metadata_name_substition_default_dict = dict( [ ( composite_file.substitute_name_with_metadata, d_type.metadata_spec[ composite_file.substitute_name_with_metadata ].default ) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata ] )
            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[ meta_name ]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename( meta_value, default=metadata_name_substition_default_dict[ meta_name ] )
                        dataset.metadata[ meta_name ] = meta_value
            dataset.precreated_name = dataset.name = self.get_composite_dataset_name( context )
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file( dataset ) ), prefix='upload_auto_primary_file' )
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] )
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.space_to_tab = file_bunch.space_to_tab
                dataset.warnings.extend( warnings )
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception( 'No primary dataset file was available for composite upload' )
            keys = [ value.name for value in writable_files.values() ]
            for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ):
                key = keys[ i + writable_files_offset ]
                if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                    dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
                    dataset.composite_files[ key ] = None
                else:
                    file_bunch, warnings = get_one_filename( group_incoming )
                    dataset.warnings.extend( warnings )
                    if file_bunch.path:
                        dataset.composite_files[ key ] = file_bunch.__dict__
                    else:
                        dataset.composite_files[ key ] = None
                        if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                            dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
            return [ dataset ]
        else:
            datasets = get_filenames( context[ self.name ][0] )
            rval = []
            for dataset in datasets:
                dataset.file_type = file_type
                dataset.datatype = d_type
                dataset.ext = self.get_datatype_ext( trans, context )
                dataset.dbkey = dbkey
                rval.append( dataset )
            return rval
Пример #33
0
 dataset = UploadedDataset()
 dataset.file_type = file_type
 dataset.datatype = d_type
 dataset.dbkey = dbkey
 
 temp_name = None
 precreated_name = None
 is_multi_byte = False
 space_to_tab = False
 warnings = []
 
 dataset_name = None
 dataset_info = None
 if dataset.datatype.composite_type == 'auto_primary_file':
     #replace sniff here with just creating an empty file
     temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file() ), prefix='upload_auto_primary_file' )
     precreated_name = dataset_name = 'Uploaded Composite Dataset (%s)' % ( file_type )
 else:
     temp_name, precreated_name, is_multi_byte, space_to_tab, dataset_name, dataset_info, warnings = get_one_filename( groups_incoming[ 0 ] )
     writable_files_offset = 1
 if temp_name is None:#remove this before finish, this should create an empty dataset
     raise Exception( 'No primary dataset file was available for composite upload' )
 dataset.primary_file = temp_name
 dataset.is_multi_byte = is_multi_byte
 dataset.space_to_tab = space_to_tab
 dataset.precreated_name = precreated_name
 dataset.name = dataset_name
 dataset.info = dataset_info
 dataset.warnings.extend( warnings )
 dataset.register_temp_file( temp_name )
 
Пример #34
0
 def add_file( self, trans, folder_id, file_obj, name, file_format, dbkey, roles, info='no info', space_to_tab=False,
               replace_dataset=None, library_item_info_template=None, template_elements={}, message=None ):
     folder = trans.app.model.LibraryFolder.get( folder_id )
     data_type = None
     line_count = 0
     temp_name, is_multi_byte = sniff.stream_to_file( file_obj )
     # See if we have an empty file
     if not os.path.getsize( temp_name ) > 0:
         raise BadFileException( "you attempted to upload an empty file." )
     if is_multi_byte:
         ext = sniff.guess_ext( temp_name, is_multi_byte=True )
     else:
         if not data_type:
             # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress on the fly.
             is_gzipped, is_valid = self.check_gzip( temp_name )
             if is_gzipped and not is_valid:
                 raise BadFileException( "you attempted to upload an inappropriate file." )
             elif is_gzipped and is_valid:
                 # We need to uncompress the temp_name file
                 CHUNK_SIZE = 2**20 # 1Mb   
                 fd, uncompressed = tempfile.mkstemp()   
                 gzipped_file = gzip.GzipFile( temp_name )
                 while 1:
                     try:
                         chunk = gzipped_file.read( CHUNK_SIZE )
                     except IOError:
                         os.close( fd )
                         os.remove( uncompressed )
                         raise BadFileException( 'problem uncompressing gzipped data.' )
                     if not chunk:
                         break
                     os.write( fd, chunk )
                 os.close( fd )
                 gzipped_file.close()
                 # Replace the gzipped file with the decompressed file
                 shutil.move( uncompressed, temp_name )
                 name = name.rstrip( '.gz' )
                 data_type = 'gzip'
         ext = ''
         if not data_type:
             # See if we have a zip archive
             is_zipped, is_valid, test_ext = self.check_zip( temp_name )
             if is_zipped and not is_valid:
                 raise BadFileException( "you attempted to upload an inappropriate file." )
             elif is_zipped and is_valid:
                 # Currently, we force specific tools to handle this case.  We also require the user
                 # to manually set the incoming file_format
                 if ( test_ext == 'ab1' or test_ext == 'scf' ) and file_format != 'binseq.zip':
                     raise BadFileException( "Invalid 'File Format' for archive consisting of binary files - use 'Binseq.zip'." )
                 elif test_ext == 'txt' and file_format != 'txtseq.zip':
                     raise BadFileException( "Invalid 'File Format' for archive consisting of text files - use 'Txtseq.zip'." )
                 if not ( file_format == 'binseq.zip' or file_format == 'txtseq.zip' ):
                     raise BadFileException( "you must manually set the 'File Format' to either 'Binseq.zip' or 'Txtseq.zip' when uploading zip files." )
                 data_type = 'zip'
                 ext = file_format
         if not data_type:
             if self.check_binary( temp_name ):
                 try:
                     ext = name.split( "." )[1].strip().lower()
                 except:
                     ext = ''
                 if not( ext == 'ab1' or ext == 'scf' ):
                     raise BadFileException( "you attempted to upload an inappropriate file." )
                 if ext == 'ab1' and file_format != 'ab1':
                     raise BadFileException( "you must manually set the 'File Format' to 'Ab1' when uploading ab1 files." )
                 elif ext == 'scf' and file_format != 'scf':
                     raise BadFileException( "you must manually set the 'File Format' to 'Scf' when uploading scf files." )
                 data_type = 'binary'
         if not data_type:
             # We must have a text file
             if self.check_html( temp_name ):
                 raise BadFileException( "you attempted to upload an inappropriate file." )
         if data_type != 'binary' and data_type != 'zip':
             if space_to_tab:
                 line_count = sniff.convert_newlines_sep2tabs( temp_name )
             elif os.stat( temp_name ).st_size < 262144000: # 250MB
                 line_count = sniff.convert_newlines( temp_name )
             else:
                 if sniff.check_newlines( temp_name ):
                     line_count = sniff.convert_newlines( temp_name )
                 else:
                     line_count = None
             if file_format == 'auto':
                 ext = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order )    
             else:
                 ext = file_format
             data_type = ext
     if info is None:
         info = 'uploaded %s file' % data_type
     if file_format == 'auto':
         data_type = sniff.guess_ext( temp_name, sniff_order=trans.app.datatypes_registry.sniff_order )    
     else:
         data_type = file_format
     if replace_dataset:
         # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
         library_dataset = replace_dataset
     else:
         # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new 
         # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
         library_dataset = trans.app.model.LibraryDataset( folder=folder, name=name, info=info )
         library_dataset.flush()
         trans.app.security_agent.copy_library_permissions( folder, library_dataset )
     ldda = trans.app.model.LibraryDatasetDatasetAssociation( name=name, 
                                                              info=info, 
                                                              extension=data_type, 
                                                              dbkey=dbkey, 
                                                              library_dataset=library_dataset,
                                                              user=trans.get_user(),
                                                              create_dataset=True )
     ldda.message = message
     ldda.flush()
     # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
     trans.app.security_agent.copy_library_permissions( library_dataset, ldda )
     if replace_dataset:
         # Copy the Dataset level permissions from replace_dataset to the new LibraryDatasetDatasetAssociation.dataset
         trans.app.security_agent.copy_dataset_permissions( replace_dataset.library_dataset_dataset_association.dataset, ldda.dataset )
     else:
         # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset
         trans.app.security_agent.set_all_dataset_permissions( ldda.dataset, trans.app.security_agent.user_get_default_permissions( trans.get_user() ) )
         folder.add_library_dataset( library_dataset, genome_build=dbkey )
         folder.flush()
     library_dataset.library_dataset_dataset_association_id = ldda.id
     library_dataset.flush()
     # Handle any templates included in the upload form
     if library_item_info_template:
         user = trans.get_user()
         library_item_info = trans.app.model.LibraryItemInfo( user=user )
         library_item_info.library_item_info_template = library_item_info_template
         library_item_info.flush()
         trans.app.security_agent.copy_library_permissions( library_item_info_template, library_item_info )
         for template_element in library_item_info_template.elements:
             info_element_value = template_elements.get( "info_element_%s_%s" % ( library_item_info_template.id, template_element.id ), None )
             info_element = trans.app.model.LibraryItemInfoElement()
             info_element.contents = info_element_value
             info_element.library_item_info_template_element = template_element
             info_element.library_item_info = library_item_info
             info_element.flush()
         library_item_info_association = trans.app.model.LibraryDatasetDatasetInfoAssociation( user=user )
         library_item_info_association.set_library_item( ldda )
         library_item_info_association.library_item_info = library_item_info
         library_item_info_association.flush()
     # If roles were selected upon upload, restrict access to the Dataset to those roles
     if roles:
         for role in roles:
             dp = trans.app.model.DatasetPermissions( RBACAgent.permitted_actions.DATASET_ACCESS.action, ldda.dataset, role )
             dp.flush()
     shutil.move( temp_name, ldda.dataset.file_name )
     ldda.state = ldda.states.OK
     ldda.init_meta()
     if line_count:
         try:
             if is_multi_byte:
                 ldda.set_multi_byte_peek( line_count=line_count )
             else:
                 ldda.set_peek( line_count=line_count )
         except:
             if is_multi_byte:
                 ldda.set_multi_byte_peek()
             else:
                 ldda.set_peek()
     else:
         if is_multi_byte:
             ldda.set_multi_byte_peek()
         else:
             ldda.set_peek()
     ldda.set_size()
     if ldda.missing_meta():
         ldda.datatype.set_meta( ldda )
     ldda.flush()
     return ldda
Пример #35
0
    def get_uploaded_datasets(self, trans, context, override_name=None, override_info=None):
        def get_data_file_filename(data_file, override_name=None, override_info=None, purge=True):
            dataset_name = override_name

            def get_file_name(file_name):
                file_name = file_name.split('\\')[-1]
                file_name = file_name.split('/')[-1]
                return file_name
            try:
                # Use the existing file
                if not dataset_name and 'filename' in data_file:
                    dataset_name = get_file_name(data_file['filename'])
                return Bunch(type='file', path=data_file['local_filename'], name=dataset_name, purge_source=purge)
            except Exception:
                # The uploaded file should've been persisted by the upload tool action
                return Bunch(type=None, path=None, name=None)

        def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:8].lower()
                    looks_like_url = False
                    for url_prefix in ["http://", "https://", "ftp://", "file://"]:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url', path=line, name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file', path=url_paste_file, name=dataset_name)

        def get_one_filename(context):
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            uuid = context.get('uuid', None) or None  # Turn '' to None
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            warnings = []
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            if file_bunch.path:
                if url_paste is not None and url_paste.strip():
                    warnings.append("All file contents specified in the paste box were ignored.")
                if ftp_files:
                    warnings.append("All FTP uploaded file selections were ignored.")
            elif url_paste is not None and url_paste.strip():  # we need to use url_paste
                for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                    if file_bunch.path:
                        break
                if file_bunch.path and ftp_files is not None:
                    warnings.append("All FTP uploaded file selections were ignored.")
            elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
                user_ftp_dir = trans.user_ftp_dir
                assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                    for filename in filenames:
                        for ftp_filename in ftp_files:
                            if ftp_filename == filename:
                                path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                                if not os.path.islink(os.path.join(dirpath, filename)):
                                    ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                                     'filename' : os.path.basename(path)}
                                    purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                                    file_bunch = get_data_file_filename(
                                        ftp_data_file,
                                        override_name=name,
                                        override_info=info,
                                        purge=purge,
                                    )
                                    if file_bunch.path:
                                        break
                        if file_bunch.path:
                            break
                    if file_bunch.path:
                        break
            file_bunch.to_posix_lines = to_posix_lines
            file_bunch.auto_decompress = auto_decompress
            file_bunch.space_to_tab = space_to_tab
            file_bunch.uuid = uuid
            if file_type is not None:
                file_bunch.file_type = file_type
            if dbkey is not None:
                file_bunch.dbkey = dbkey
            return file_bunch, warnings

        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)]
                if trans.user is None:
                    log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, text_type):
                                    valid_files.append(unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                                 'filename' : os.path.basename(ftp_file)}
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval
        file_type = self.get_file_type(context)
        file_count = self.get_file_count(trans, context)
        d_type = self.get_datatype(trans, context)
        dbkey = self.get_dbkey(context)
        tag_using_filenames = context.get('tag_using_filenames', False)
        force_composite = asbool(context.get('force_composite', 'False'))
        writable_files = d_type.writable_files
        writable_files_offset = 0
        groups_incoming = [None for _ in range(file_count)]
        for group_incoming in context.get(self.name, []):
            i = int(group_incoming['__index__'])
            groups_incoming[i] = group_incoming
        if d_type.composite_type is not None or force_composite:
            # handle uploading of composite datatypes
            # Only one Dataset can be created
            dataset = Bunch()
            dataset.type = 'composite'
            dataset.file_type = file_type
            dataset.dbkey = dbkey
            dataset.datatype = d_type
            dataset.warnings = []
            dataset.metadata = {}
            dataset.composite_files = {}
            dataset.uuid = None
            dataset.tag_using_filenames = None
            # load metadata
            files_metadata = context.get(self.metadata_ref, {})
            metadata_name_substition_default_dict = dict((composite_file.substitute_name_with_metadata, d_type.metadata_spec[composite_file.substitute_name_with_metadata].default) for composite_file in d_type.composite_files.values() if composite_file.substitute_name_with_metadata)
            for meta_name, meta_spec in d_type.metadata_spec.items():
                if meta_spec.set_in_upload:
                    if meta_name in files_metadata:
                        meta_value = files_metadata[meta_name]
                        if meta_name in metadata_name_substition_default_dict:
                            meta_value = sanitize_for_filename(meta_value, default=metadata_name_substition_default_dict[meta_name])
                        dataset.metadata[meta_name] = meta_value
            dataset.name = self.get_composite_dataset_name(context)
            if dataset.datatype.composite_type == 'auto_primary_file':
                # replace sniff here with just creating an empty file
                temp_name = sniff.stream_to_file(StringIO(d_type.generate_primary_file(dataset)), prefix='upload_auto_primary_file')
                dataset.primary_file = temp_name
                dataset.to_posix_lines = True
                dataset.auto_decompress = True
                dataset.space_to_tab = False
            else:
                file_bunch, warnings = get_one_filename(groups_incoming[0])
                writable_files_offset = 1
                dataset.primary_file = file_bunch.path
                dataset.to_posix_lines = file_bunch.to_posix_lines
                dataset.auto_decompress = file_bunch.auto_decompress
                dataset.space_to_tab = file_bunch.space_to_tab
                if file_bunch.file_type:
                    dataset.file_type = file_type
                if file_bunch.dbkey:
                    dataset.dbkey = dbkey
                dataset.warnings.extend(warnings)
            if dataset.primary_file is None:  # remove this before finish, this should create an empty dataset
                raise Exception('No primary dataset file was available for composite upload')
            if not force_composite:
                keys = [value.name for value in writable_files.values()]
            else:
                keys = [str(index) for index in range(file_count)]
            for i, group_incoming in enumerate(groups_incoming[writable_files_offset:]):
                key = keys[i + writable_files_offset]
                if not force_composite and group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
                    dataset.warnings.append("A required composite file (%s) was not specified." % (key))
                    dataset.composite_files[key] = None
                else:
                    file_bunch, warnings = get_one_filename(group_incoming)
                    dataset.warnings.extend(warnings)
                    if file_bunch.path:
                        if force_composite:
                            key = group_incoming.get("NAME") or i
                        dataset.composite_files[key] = file_bunch.__dict__
                    elif not force_composite:
                        dataset.composite_files[key] = None
                        if not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
                            dataset.warnings.append("A required composite file (%s) was not specified." % (key))
            return [dataset]
        else:
            rval = []
            for i, file_contexts in enumerate(context[self.name]):
                datasets = get_filenames(file_contexts)
                for dataset in datasets:
                    override_file_type = self.get_file_type(context[self.name][i], parent_context=context)
                    d_type = self.get_datatype(trans, context[self.name][i], parent_context=context)
                    dataset.file_type = override_file_type
                    dataset.datatype = d_type
                    dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context)
                    dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context)
                    dataset.tag_using_filenames = tag_using_filenames
                    rval.append(dataset)
            return rval
Пример #36
0
def add_file(dataset, registry, json_file, output_path):
    data_type = None
    line_count = None
    converted_path = None
    stdout = None
    link_data_only = dataset.get('link_data_only', 'copy_files') != 'copy_files'

    # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
    # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
    # paths during data conversions since this user already owns that path.
    # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206
    run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get("in_place", False)

    # purge_source defaults to True unless this is an FTP import and
    # ftp_upload_purge has been overridden to False in Galaxy's config.
    # We set purge_source to False if:
    # - the job does not have write access to the file, e.g. when running as the
    #   real user
    # - the files are uploaded from external paths.
    purge_source = dataset.get('purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste')

    # in_place is True unless we are running as a real user or importing external paths (i.e.
    # this is a real upload and not a path paste or ftp import).
    # in_place should always be False if running as real user because the uploaded file will
    # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't
    # modify files not controlled by Galaxy.
    in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import')

    # Base on the check_upload_content Galaxy config option and on by default, this enables some
    # security related checks on the uploaded content, but can prevent uploads from working in some cases.
    check_content = dataset.get('check_content' , True)

    # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically
    # decompressing archive files before sniffing.
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        ext = dataset.file_type
    except AttributeError:
        raise UploadProblemException('Unable to process uploaded file, missing file_type parameter.')

    if dataset.type == 'url':
        try:
            page = urlopen(dataset.path)  # page will be .close()ed by sniff methods
            temp_name = sniff.stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
        except Exception as e:
            raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, str(e)))
        dataset.path = temp_name
    # See if we have an empty file
    if not os.path.exists(dataset.path):
        raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path)
    if not os.path.getsize(dataset.path) > 0:
        raise UploadProblemException('The uploaded file is empty')
    # Is dataset content supported sniffable binary?
    is_binary = check_binary(dataset.path)
    if is_binary:
        # Sniff the data type
        guessed_ext = sniff.guess_ext(dataset.path, registry.sniff_order)
        # Set data_type only if guessed_ext is a binary datatype
        datatype = registry.get_datatype_by_extension(guessed_ext)
        if isinstance(datatype, Binary):
            data_type = guessed_ext
            ext = guessed_ext
    if not data_type:
        root_datatype = registry.get_datatype_by_extension(dataset.file_type)
        if getattr(root_datatype, 'compressed', False):
            data_type = 'compressed archive'
            ext = dataset.file_type
        else:
            # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress
            is_gzipped, is_valid = check_gzip(dataset.path, check_content=check_content)
            if is_gzipped and not is_valid:
                raise UploadProblemException('The gzipped uploaded file contains inappropriate content')
            elif is_gzipped and is_valid and auto_decompress:
                if not link_data_only:
                    # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
                    CHUNK_SIZE = 2 ** 20  # 1Mb
                    fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                    gzipped_file = gzip.GzipFile(dataset.path, 'rb')
                    while 1:
                        try:
                            chunk = gzipped_file.read(CHUNK_SIZE)
                        except IOError:
                            os.close(fd)
                            os.remove(uncompressed)
                            raise UploadProblemException('Problem decompressing gzipped data')
                        if not chunk:
                            break
                        os.write(fd, chunk)
                    os.close(fd)
                    gzipped_file.close()
                    # Replace the gzipped file with the decompressed file if it's safe to do so
                    if not in_place:
                        dataset.path = uncompressed
                    else:
                        shutil.move(uncompressed, dataset.path)
                    os.chmod(dataset.path, 0o644)
                dataset.name = dataset.name.rstrip('.gz')
                data_type = 'gzip'
            if not data_type:
                # See if we have a bz2 file, much like gzip
                is_bzipped, is_valid = check_bz2(dataset.path, check_content)
                if is_bzipped and not is_valid:
                    raise UploadProblemException('The gzipped uploaded file contains inappropriate content')
                elif is_bzipped and is_valid and auto_decompress:
                    if not link_data_only:
                        # We need to uncompress the temp_name file
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                        bzipped_file = bz2.BZ2File(dataset.path, 'rb')
                        while 1:
                            try:
                                chunk = bzipped_file.read(CHUNK_SIZE)
                            except IOError:
                                os.close(fd)
                                os.remove(uncompressed)
                                raise UploadProblemException('Problem decompressing bz2 compressed data')
                            if not chunk:
                                break
                            os.write(fd, chunk)
                        os.close(fd)
                        bzipped_file.close()
                        # Replace the bzipped file with the decompressed file if it's safe to do so
                        if not in_place:
                            dataset.path = uncompressed
                        else:
                            shutil.move(uncompressed, dataset.path)
                        os.chmod(dataset.path, 0o644)
                    dataset.name = dataset.name.rstrip('.bz2')
                    data_type = 'bz2'
            if not data_type:
                # See if we have a zip archive
                is_zipped = check_zip(dataset.path)
                if is_zipped and auto_decompress:
                    if not link_data_only:
                        CHUNK_SIZE = 2 ** 20  # 1Mb
                        uncompressed = None
                        uncompressed_name = None
                        unzipped = False
                        z = zipfile.ZipFile(dataset.path)
                        for name in z.namelist():
                            if name.endswith('/'):
                                continue
                            if unzipped:
                                stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                                break
                            fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname(output_path), text=False)
                            if sys.version_info[:2] >= (2, 6):
                                zipped_file = z.open(name)
                                while 1:
                                    try:
                                        chunk = zipped_file.read(CHUNK_SIZE)
                                    except IOError:
                                        os.close(fd)
                                        os.remove(uncompressed)
                                        raise UploadProblemException('Problem decompressing zipped data')
                                    if not chunk:
                                        break
                                    os.write(fd, chunk)
                                os.close(fd)
                                zipped_file.close()
                                uncompressed_name = name
                                unzipped = True
                            else:
                                # python < 2.5 doesn't have a way to read members in chunks(!)
                                try:
                                    with open(uncompressed, 'wb') as outfile:
                                        outfile.write(z.read(name))
                                    uncompressed_name = name
                                    unzipped = True
                                except IOError:
                                    os.close(fd)
                                    os.remove(uncompressed)
                                    raise UploadProblemException('Problem decompressing zipped data')
                        z.close()
                        # Replace the zipped file with the decompressed file if it's safe to do so
                        if uncompressed is not None:
                            if not in_place:
                                dataset.path = uncompressed
                            else:
                                shutil.move(uncompressed, dataset.path)
                            os.chmod(dataset.path, 0o644)
                            dataset.name = uncompressed_name
                    data_type = 'zip'
            if not data_type:
                if is_binary or registry.is_extension_unsniffable_binary(dataset.file_type):
                    # We have a binary dataset, but it is not Bam, Sff or Pdf
                    data_type = 'binary'
                    parts = dataset.name.split(".")
                    if len(parts) > 1:
                        ext = parts[-1].strip().lower()
                        is_ext_unsniffable_binary = registry.is_extension_unsniffable_binary(ext)
                        if check_content and not is_ext_unsniffable_binary:
                            raise UploadProblemException('The uploaded binary file contains inappropriate content')
                        elif is_ext_unsniffable_binary and dataset.file_type != ext:
                            err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext, ext)
                            raise UploadProblemException(err_msg)
            if not data_type:
                # We must have a text file
                if check_content and check_html(dataset.path):
                    raise UploadProblemException('The uploaded file contains inappropriate HTML content')
            if data_type != 'binary':
                if not link_data_only and data_type not in ('gzip', 'bz2', 'zip'):
                    # Convert universal line endings to Posix line endings if to_posix_lines is True
                    # and the data is not binary or gzip-, bz2- or zip-compressed.
                    if dataset.to_posix_lines:
                        tmpdir = output_adjacent_tmpdir(output_path)
                        tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
                        if dataset.space_to_tab:
                            line_count, converted_path = sniff.convert_newlines_sep2tabs(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                        else:
                            line_count, converted_path = sniff.convert_newlines(dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix)
                if dataset.file_type == 'auto':
                    ext = sniff.guess_ext(converted_path or dataset.path, registry.sniff_order)
                else:
                    ext = dataset.file_type
                data_type = ext
    # Save job info for the framework
    if ext == 'auto' and data_type == 'binary':
        ext = 'data'
    if ext == 'auto' and dataset.ext:
        ext = dataset.ext
    if ext == 'auto':
        ext = 'data'
    datatype = registry.get_datatype_by_extension(ext)
    if dataset.type in ('server_dir', 'path_paste') and link_data_only:
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            raise UploadProblemException(err_msg)
    if not link_data_only and converted_path:
        # Move the dataset to its "real" path
        try:
            shutil.move(converted_path, output_path)
        except OSError as e:
            # We may not have permission to remove converted_path
            if e.errno != errno.EACCES:
                raise
    elif not link_data_only:
        if purge_source:
            shutil.move(dataset.path, output_path)
        else:
            shutil.copy(dataset.path, output_path)
    # Write the job info
    stdout = stdout or 'uploaded %s file' % data_type
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    json_file.write(dumps(info) + "\n")
    if not link_data_only and datatype and datatype.dataset_content_needs_grooming(output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
Пример #37
0
 def get_uploaded_datasets( self, trans, context, override_name = None, override_info = None ):
     def get_data_file_filename( data_file, override_name = None, override_info = None ):
         dataset_name = override_name
         dataset_info = override_info
         def get_file_name( file_name ):
             file_name = file_name.split( '\\' )[-1]
             file_name = file_name.split( '/' )[-1]
             return file_name
         try:
             # Use the existing file
             if not dataset_name and 'filename' in data_file:
                 dataset_name = get_file_name( data_file['filename'] )
             if not dataset_info:
                 dataset_info = 'uploaded file'
             return Bunch( type='file', path=data_file['local_filename'], name=get_file_name( data_file['filename'] ) )
             #return 'file', data_file['local_filename'], get_file_name( data_file.filename ), dataset_name, dataset_info
         except:
             # The uploaded file should've been persisted by the upload tool action
             return Bunch( type=None, path=None, name=None )
             #return None, None, None, None, None
     def get_url_paste_urls_or_filename( group_incoming, override_name = None, override_info = None ):
         filenames = []
         url_paste_file = group_incoming.get( 'url_paste', None )
         if url_paste_file is not None:
             url_paste = open( url_paste_file, 'r' ).read( 1024 )
             if url_paste.lstrip().lower().startswith( 'http://' ) or url_paste.lstrip().lower().startswith( 'ftp://' ):
                 url_paste = url_paste.replace( '\r', '' ).split( '\n' )
                 for line in url_paste:
                     line = line.strip()
                     if line:
                         if not line.lower().startswith( 'http://' ) and not line.lower().startswith( 'ftp://' ):
                             continue # non-url line, ignore
                         precreated_name = line
                         dataset_name = override_name
                         if not dataset_name:
                             dataset_name = line
                         dataset_info = override_info
                         if not dataset_info:
                             dataset_info = 'uploaded url'
                         yield Bunch( type='url', path=line, name=precreated_name )
                         #yield ( 'url', line, precreated_name, dataset_name, dataset_info )
             else:
                 dataset_name = dataset_info = precreated_name = 'Pasted Entry' #we need to differentiate between various url pastes here
                 if override_name:
                     dataset_name = override_name
                 if override_info:
                     dataset_info = override_info
                 yield Bunch( type='file', path=url_paste_file, name=precreated_name )
                 #yield ( 'file', url_paste_file, precreated_name, dataset_name, dataset_info )
     def get_one_filename( context ):
         data_file = context['file_data']
         url_paste = context['url_paste']
         name = context.get( 'NAME', None )
         info = context.get( 'INFO', None )
         warnings = []
         space_to_tab = False 
         if context.get( 'space_to_tab', None ) not in ["None", None]:
             space_to_tab = True
         file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
         if file_bunch.path and url_paste:
             if url_paste.strip():
                 warnings.append( "All file contents specified in the paste box were ignored." )
         else: #we need to use url_paste
             for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ):
                 if file_bunch.path:
                     break
         return file_bunch, warnings
     def get_filenames( context ):
         rval = []
         data_file = context['file_data']
         url_paste = context['url_paste']
         name = context.get( 'NAME', None )
         info = context.get( 'INFO', None )
         space_to_tab = False
         if context.get( 'space_to_tab', None ) not in ["None", None]:
             space_to_tab = True
         warnings = []
         file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
         if file_bunch.path:
             file_bunch.space_to_tab = space_to_tab
             rval.append( file_bunch )
         for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ):
             if file_bunch.path:
                 file_bunch.space_to_tab = space_to_tab
                 rval.append( file_bunch )
         return rval
     file_type = self.get_file_type( context )
     d_type = self.get_datatype( trans, context )
     dbkey = context.get( 'dbkey', None )
     writable_files = d_type.writable_files
     writable_files_offset = 0
     groups_incoming = [ None for filename in writable_files ]
     for group_incoming in context.get( self.name, [] ):
         i = int( group_incoming['__index__'] )
         groups_incoming[ i ] = group_incoming
     if d_type.composite_type is not None:
         #handle uploading of composite datatypes
         #Only one Dataset can be created
         dataset = Bunch()
         dataset.type = 'composite'
         dataset.file_type = file_type
         dataset.dbkey = dbkey
         dataset.datatype = d_type
         dataset.warnings = []
         dataset.metadata = {}
         dataset.composite_files = {}
         #load metadata
         files_metadata = context.get( self.metadata_ref, {} )
         for meta_name, meta_spec in d_type.metadata_spec.iteritems():
             if meta_spec.set_in_upload:
                 if meta_name in files_metadata:
                     dataset.metadata[ meta_name ] = files_metadata[ meta_name ]
         dataset_name = None
         dataset_info = None
         if dataset.datatype.composite_type == 'auto_primary_file':
             #replace sniff here with just creating an empty file
             temp_name, is_multi_byte = sniff.stream_to_file( StringIO.StringIO( d_type.generate_primary_file() ), prefix='upload_auto_primary_file' )
             dataset.primary_file = temp_name
             dataset.space_to_tab = False
             dataset.precreated_name = dataset.name = 'Uploaded Composite Dataset (%s)' % ( file_type )
         else:
             file_bunch, warnings = get_one_filename( groups_incoming[ 0 ] )
             if dataset.datatype.composite_type:
                 precreated_name = 'Uploaded Composite Dataset (%s)' % ( file_type )
             writable_files_offset = 1
             dataset.primary_file = file_bunch.path
             dataset.space_to_tab = file_bunch.space_to_tab
             dataset.precreated_name = file_bunch.precreated_name
             dataset.name = file_bunch.precreated_name
             dataset.warnings.extend( file_bunch.warnings )
         if dataset.primary_file is None:#remove this before finish, this should create an empty dataset
             raise Exception( 'No primary dataset file was available for composite upload' )
         keys = [ value.name for value in writable_files.values() ]
         for i, group_incoming in enumerate( groups_incoming[ writable_files_offset : ] ):
             key = keys[ i + writable_files_offset ]
             if group_incoming is None and not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                 dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
                 dataset.composite_files[ key ] = None
             else:
                 file_bunch, warnings = get_one_filename( group_incoming )
                 if file_bunch.path:
                     dataset.composite_files[ key ] = file_bunch.__dict__
                 else:
                     dataset.composite_files[ key ] = None
                     if not writable_files[ writable_files.keys()[ keys.index( key ) ] ].optional:
                         dataset.warnings.append( "A required composite file (%s) was not specified." % ( key ) )
         return [ dataset ]
     else:
         datasets = get_filenames( context[ self.name ][0] )
         rval = []
         for dataset in datasets:
             dataset.file_type = file_type
             dataset.datatype = d_type
             dataset.ext = self.get_datatype_ext( trans, context )
             dataset.dbkey = dbkey
             rval.append( dataset )
         return rval