Exemplo n.º 1
0
def main():
    # Read options, args.
    parser = optparse.OptionParser()
    parser.add_option('-c', '--chr-col', type='int', dest='chrom_col')
    parser.add_option('-s', '--start-col', type='int', dest='start_col')
    parser.add_option('-e', '--end-col', type='int', dest='end_col')
    parser.add_option('-P', '--preset', dest='preset')
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    tmpfile = tempfile.NamedTemporaryFile()
    sort_params = None

    if options.chrom_col and options.start_col and options.end_col:
        sort_params = [
            "sort",
            "-k%(i)s,%(i)s" % {'i': options.chrom_col},
            "-k%(i)i,%(i)in" % {'i': options.start_col},
            "-k%(i)i,%(i)in" % {'i': options.end_col}
        ]
    elif options.preset == "bed":
        sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
    elif options.preset == "vcf":
        sort_params = ["sort", "-k1,1", "-k2,2n"]
    elif options.preset == "gff":
        sort_params = ["sort", "-s", "-k1,1", "-k4,4n"]  # stable sort on start column
    # Skip any lines starting with "#" and "track"
    grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile)
    grepped.stdout.close()
    output, err = after_sort.communicate()

    ctabix.tabix_compress(tmpfile.name, output_fname, force=True)
Exemplo n.º 2
0
def main():
    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    ctabix.tabix_compress(input_fname, output_fname, force=True)
Exemplo n.º 3
0
def main():
    # Read options, args.
    usage = "Usage: %prog [options] tabular_input_file bgzip_output_file"
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-c',
                      '--chr-col',
                      type='int',
                      default=0,
                      dest='chrom_col')
    parser.add_option('-s',
                      '--start-col',
                      type='int',
                      default=1,
                      dest='start_col')
    parser.add_option('-e', '--end-col', type='int', default=1, dest='end_col')
    (options, args) = parser.parse_args()
    if len(args) != 2:
        parser.print_usage()
        exit(1)
    input_fname, output_fname = args
    output_dir = os.path.dirname(output_fname)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    ctabix.tabix_compress(input_fname, output_fname, force=True)
    # Column indices are 0-based.
    ctabix.tabix_index(output_fname,
                       seq_col=options.chrom_col,
                       start_col=options.start_col,
                       end_col=options.end_col)
Exemplo n.º 4
0
def main():
    # Read options, args.
    parser = optparse.OptionParser()
    parser.add_option('-c', '--chr-col', type='int', dest='chrom_col')
    parser.add_option('-s', '--start-col', type='int', dest='start_col')
    parser.add_option('-e', '--end-col', type='int', dest='end_col')
    parser.add_option('-P', '--preset', dest='preset')
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    tmpfile = tempfile.NamedTemporaryFile()
    sort_params = None

    if options.chrom_col and options.start_col and options.end_col:
        sort_params = [
            "sort",
            "-k%(i)s,%(i)s" % {
                'i': options.chrom_col
            },
            "-k%(i)i,%(i)in" % {
                'i': options.start_col
            },
            "-k%(i)i,%(i)in" % {
                'i': options.end_col
            }
        ]
    elif options.preset == "bed":
        sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
    elif options.preset == "vcf":
        sort_params = ["sort", "-k1,1", "-k2,2n"]
    elif options.preset == "gff":
        sort_params = ["sort", "-s", "-k1,1",
                       "-k4,4n"]  # stable sort on start column
    # Skip any lines starting with "#" and "track"
    grepped = subprocess.Popen(
        ["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname],
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE)
    after_sort = subprocess.Popen(sort_params,
                                  stdin=grepped.stdout,
                                  stderr=subprocess.PIPE,
                                  stdout=tmpfile)
    grepped.stdout.close()
    output, err = after_sort.communicate()

    ctabix.tabix_compress(tmpfile.name, output_fname, force=True)
Exemplo n.º 5
0
def main():
    # Read options, args.
    usage = "Usage: %prog [options] tabular_input_file bgzip_output_file"
    parser = optparse.OptionParser(usage=usage)
    parser.add_option( '-c', '--chr-col', type='int', default=0, dest='chrom_col' )
    parser.add_option( '-s', '--start-col', type='int', default=1, dest='start_col' )
    parser.add_option( '-e', '--end-col', type='int', default=1, dest='end_col' )
    (options, args) = parser.parse_args()
    if len(args) != 2:
        parser.print_usage()
        exit(1)
    input_fname, output_fname = args
    output_dir = os.path.dirname(output_fname)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    ctabix.tabix_compress(input_fname, output_fname, force=True)
    # Column indices are 0-based.
    ctabix.tabix_index(output_fname, seq_col=options.chrom_col, start_col=options.start_col, end_col=options.end_col)
def tabix_file(input_fname, output_fname):
    print >> sys.stdout, "tabix_file: %s -> %s" % (input_fname, output_fname)
    ctabix.tabix_compress(input_fname, output_fname, force=True)
    # Column indices are 0-based.
    ctabix.tabix_index(output_fname, seq_col=0, start_col=1, end_col=1)
def tabix_file(input_fname, output_fname):
    print >> sys.stdout, "tabix_file: %s -> %s" % (input_fname, output_fname)
    ctabix.tabix_compress(input_fname, output_fname, force=True)
    # Column indices are 0-based.
    ctabix.tabix_index(output_fname, seq_col=0, start_col=1, end_col=1)