示例#1
0
def _call_init_upload(file_name, file_size, metadata, tags, project, samples_resource):
    """Call init_upload at the One Codex API and return data used to upload the file.

    Parameters
    ----------
    file_name : `string`
        The file_name you wish to associate this fastx file with at One Codex.
    file_size : `integer`
        Accurate size of file to be uploaded, in bytes.
    metadata : `dict`, optional
    tags : `list`, optional
    project : `string`, optional
        UUID of project to associate this sample with.
    samples_resource : `onecodex.models.Samples`
        Wrapped potion-client object exposing `init_upload` and `confirm_upload` routes to mainline.

    Returns
    -------
    `dict`
        Contains, at a minimum, 'upload_url' and 'sample_id'. Should also contain various additional
        data used to upload the file to fastx-proxy, a user's S3 bucket, or an intermediate bucket.
    """
    upload_args = {
        "filename": file_name,
        "size": file_size,
        "upload_type": "standard",  # this is multipart form data
    }

    if metadata:
        # format metadata keys as snake case
        new_metadata = {}

        for md_key, md_val in metadata.items():
            new_metadata[snake_case(md_key)] = md_val

        upload_args["metadata"] = new_metadata

    if tags:
        upload_args["tags"] = tags

    if project:
        upload_args["project"] = getattr(project, "id", project)

    try:
        upload_info = samples_resource.init_upload(upload_args)
    except requests.exceptions.HTTPError as e:
        raise_api_error(e.response, state="init")
    except requests.exceptions.ConnectionError:
        raise_connectivity_error(file_name)

    return upload_info
示例#2
0
def build_upload_dict(metadata, tags, project):
    """Build the metadata/tags/projects in a dict compatible with what the OneCodex backend expects."""
    upload_args = {}
    if metadata:
        # format metadata keys as snake case
        new_metadata = {}

        for md_key, md_val in metadata.items():
            new_metadata[snake_case(md_key)] = md_val

        upload_args["metadata"] = new_metadata

    if tags:
        upload_args["tags"] = tags

    if project:
        upload_args["project"] = getattr(project, "id", project)

    return upload_args
示例#3
0
def _make_retry_fields(file_name, metadata, tags, project):
    """Generate fields to send to init_multipart_upload in the case that a Sample upload via
    fastx-proxy fails.

    Parameters
    ----------
    file_name : `string`
        The file_name you wish to associate this fastx file with at One Codex.
    metadata : `dict`, optional
    tags : `list`, optional
    project : `string`, optional
        UUID of project to associate this sample with.

    Returns
    -------
    `dict`
        Contains metadata fields that will be integrated into the Sample model created when
        init_multipart_upload is called.
    """
    upload_args = {"filename": file_name}

    if metadata:
        # format metadata keys as snake case
        new_metadata = {}

        for md_key, md_val in metadata.items():
            new_metadata[snake_case(md_key)] = md_val

        upload_args["metadata"] = new_metadata

    if tags:
        upload_args["tags"] = tags

    if project:
        upload_args["project"] = getattr(project, "id", project)

    return upload_args
示例#4
0
def test_snake_case():
    test_cases = ["SnakeCase", "snakeCase", "SNAKE_CASE"]
    for test_case in test_cases:
        assert snake_case(test_case) == "snake_case"
示例#5
0
def test_snake_case():
    test_cases = ["SnakeCase", "snakeCase", "SNAKE_CASE"]
    for test_case in test_cases:
        assert snake_case(test_case) == "snake_case"
示例#6
0
def upload(ctx, files, max_threads, clean, no_interleave, prompt, validate,
           forward, reverse, tags, metadata):
    """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex"""

    appendables = {}
    if tags:
        appendables['tags'] = []
        for tag in tags:
            appendables['tags'].append(tag)

    if metadata:
        appendables['metadata'] = {}
        for metadata_kv in metadata:
            split_metadata = metadata_kv.split('=', 1)
            if len(split_metadata) > 1:
                metadata_value = split_metadata[1]
                appendables['metadata'][snake_case(
                    split_metadata[0])] = metadata_value

    appendables = validate_appendables(appendables, ctx.obj['API'])

    if (forward or reverse) and not (forward and reverse):
        click.echo('You must specify both forward and reverse files', err=True)
        sys.exit(1)
    if forward and reverse:
        if len(files) > 0:
            click.echo(
                'You may not pass a FILES argument when using the '
                ' --forward and --reverse options.',
                err=True)
            sys.exit(1)
        files = [(forward, reverse)]
        no_interleave = True
    if len(files) == 0:
        click.echo(ctx.get_help())
        return
    else:
        files = list(files)

    if not no_interleave:
        # "intelligently" find paired files and tuple them
        paired_files = []
        single_files = set(files)
        for filename in files:
            # convert "read 1" filenames into "read 2" and check that they exist; if they do
            # upload the files as a pair, autointerleaving them
            pair = re.sub('[._][Rr]1[._]',
                          lambda x: x.group().replace('1', '2'), filename)
            # we don't necessary need the R2 to have been passed in; we infer it anyways
            if pair != filename and os.path.exists(pair):
                if not prompt and pair not in single_files:
                    # if we're not prompting, don't automatically pull in files
                    # not in the list the user passed in
                    continue

                paired_files.append((filename, pair))
                if pair in single_files:
                    single_files.remove(pair)
                single_files.remove(filename)

        auto_pair = True
        if prompt and len(paired_files) > 0:
            pair_list = ''
            for p in paired_files:
                pair_list += '\n  {}  &  {}'.format(os.path.basename(p[0]),
                                                    os.path.basename(p[1]))

            answer = click.confirm(
                'It appears there are paired files:{}\nInterleave them after upload?'
                .format(pair_list),
                default='Y')
            if not answer:
                auto_pair = False

        if auto_pair:
            files = paired_files + list(single_files)

    if not clean:
        warnings.filterwarnings('error', category=ValidationWarning)

    try:
        # do the uploading
        ctx.obj['API'].Samples.upload(files,
                                      threads=max_threads,
                                      validate=validate,
                                      metadata=appendables['valid_metadata'],
                                      tags=appendables['valid_tags'])

    except ValidationWarning as e:
        sys.stderr.write('\nERROR: {}. {}'.format(
            e, 'Running with the --clean flag will suppress this error.'))
        sys.exit(1)
    except (ValidationError, UploadException, Exception) as e:
        # TODO: Some day improve specific other exception error messages, e.g., gzip CRC IOError
        sys.stderr.write('\nERROR: {}'.format(e))
        sys.stderr.write(
            '\nPlease feel free to contact us for help at [email protected]\n\n'
        )
        sys.exit(1)
示例#7
0
def test_snake_case():
    test_cases = ['SnakeCase', 'snakeCase', 'SNAKE_CASE']
    for test_case in test_cases:
        assert snake_case(test_case) == 'snake_case'