def aggregate_file(filename, filetype, assembly, chromsizes_filename,
                   has_header):
    if filetype == 'bedfile':
        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        with tempfile.TemporaryDirectory() as td:
            output_file = op.join(td, filename + '.beddb')

            print("Aggregating bedfile")
            cca._bedfile(filename,
                         output_file,
                         assembly,
                         importance_column='random',
                         has_header=has_header,
                         chromosome=None,
                         max_per_tile=50,
                         delimiter=None,
                         chromsizes_filename=chromsizes_filename,
                         offset=0,
                         tile_size=1024)

            to_import = output_file

            # because we aggregated the file, the new filetype is beddb
            filetype = 'beddb'
            return (to_import, filetype)
    elif filetype == 'bedpe':
        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        with tempfile.TemporaryDirectory() as td:
            output_file = op.join(td, filename + '.beddb')

            print("Aggregating bedfile")
            cca._bedpe(filename,
                       output_file,
                       assembly,
                       importance_column='random',
                       has_header=has_header,
                       chromosome=None,
                       max_per_tile=50,
                       chromsizes_filename=chromsizes_filename,
                       tile_size=1024)

            to_import = output_file

            # because we aggregated the file, the new filetype is beddb
            filetype = 'bed2ddb'
            return (to_import, filetype)
    else:
        return (filename, filetype)
Exemplo n.º 2
0
def test_nonstandard_chrom():
    filename = 'test/sample_data/test_non_standard_chrom.bed'
    f = tempfile.NamedTemporaryFile(delete=False)

    ret = cca._bedfile(filename, f.name,
                       'hg19', None, False,
                       None, 100, 1024, None, None, 0)

    assert ret is None

    ret = cca._bedfile(filename, f.name,
                       'dfsdfs', None, False,
                       None, 100, 1024, None, None, 0)

    assert ret is None
Exemplo n.º 3
0
def aggregate_file(filename, filetype, assembly, chromsizes_filename,
                   has_header, no_upload, tmp_dir):
    if filetype == 'bedfile':
        if no_upload:
            raise Exception(
                "Bedfile files need to be aggregated and cannot be linked. Consider not using the --link-file option",
                file=sys.stderr)

        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        output_file = op.join(tmp_dir, filename + '.beddb')

        print("Aggregating bedfile")
        cca._bedfile(filename,
                     output_file,
                     assembly,
                     importance_column='random',
                     has_header=has_header,
                     chromosome=None,
                     max_per_tile=50,
                     delimiter=None,
                     chromsizes_filename=chromsizes_filename,
                     offset=0,
                     tile_size=1024)

        to_import = output_file

        # because we aggregated the file, the new filetype is beddb
        filetype = 'beddb'
        return (to_import, filetype)
    elif filetype == 'bedpe':
        if no_upload:
            raise Exception(
                "Bedpe files need to be aggregated and cannot be linked. Consider not using the --link-file option",
                file=sys.stderr)
        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        output_file = op.join(tmp_dir, filename + '.bed2ddb')

        print("Aggregating bedpe")
        cca._bedpe(filename,
                   output_file,
                   assembly,
                   importance_column='random',
                   has_header=has_header,
                   chromosome=None,
                   max_per_tile=50,
                   chromsizes_filename=chromsizes_filename,
                   tile_size=1024)

        to_import = output_file

        # because we aggregated the file, the new filetype is beddb
        filetype = 'bed2ddb'
        return (to_import, filetype)
    else:
        return (filename, filetype)