示例#1
0
def main():
    mincols = 1

    options, args = doc_optparse.parse( __doc__ )
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
        if options.mincols:
            mincols = int( options.mincols )
        pieces = bool( options.pieces )
        in1_gff_format = bool( options.gff1 )
        in2_gff_format = bool( options.gff2 )
        in_fname, in2_fname, out_fname = args
    except:
        doc_optparse.exception()

    # Set readers to handle either GFF or default format.
    if in1_gff_format:
        in1_reader_wrapper = GFFReaderWrapper
    else:
        in1_reader_wrapper = NiceReaderWrapper
    if in2_gff_format:
        in2_reader_wrapper = GFFReaderWrapper
    else:
        in2_reader_wrapper = NiceReaderWrapper

    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
                             chrom_col=chr_col_1,
                             start_col=start_col_1,
                             end_col=end_col_1,
                             strand_col=strand_col_1,
                             fix_strand=True )
    if in1_gff_format:
        # Intersect requires coordinates in BED format.
        g1.convert_to_bed_coord = True
    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
                             chrom_col=chr_col_2,
                             start_col=start_col_2,
                             end_col=end_col_2,
                             strand_col=strand_col_2,
                             fix_strand=True )
    if in2_gff_format:
        # Intersect requires coordinates in BED format.
        g2.convert_to_bed_coord = True

    out_file = open( out_fname, "w" )
    try:
        for feature in intersect( [g1, g2], pieces=pieces, mincols=mincols ):
            if isinstance( feature, GFFFeature ):
                # Convert back to GFF coordinates since reader converted automatically.
                convert_bed_coords_to_gff( feature )
                for interval in feature.intervals:
                    out_file.write( "%s\n" % "\t".join( interval.fields ) )
            elif isinstance( feature, GenomicInterval ):
                out_file.write( "%s\n" % "\t".join( feature.fields ) )
            else:
                out_file.write( "%s\n" % feature )
    except ParseError, e:
        out_file.close()
        fail( "Invalid file format: %s" % str( e ) )
def main():
    options, args = doc_optparse.parse( __doc__ )
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
        in1_gff_format = bool( options.gff1 )
        in2_gff_format = bool( options.gff2 )
        in_fname, in2_fname, out_fname, direction = args
    except:
        doc_optparse.exception()

    # Set readers to handle either GFF or default format.
    if in1_gff_format:
        in1_reader_wrapper = GFFIntervalToBEDReaderWrapper
    else:
        in1_reader_wrapper = NiceReaderWrapper
    if in2_gff_format:
        in2_reader_wrapper = GFFIntervalToBEDReaderWrapper
    else:
        in2_reader_wrapper = NiceReaderWrapper

    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
                             chrom_col=chr_col_1,
                             start_col=start_col_1,
                             end_col=end_col_1,
                             strand_col=strand_col_1,
                             fix_strand=True )
    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
                             chrom_col=chr_col_2,
                             start_col=start_col_2,
                             end_col=end_col_2,
                             strand_col=strand_col_2,
                             fix_strand=True )

    # Find flanking features.
    out_file = open( out_fname, "w" )
    try:
        for result in proximal_region_finder([g1, g2], direction):
            if type( result ) is list:
                line, closest_feature = result
                # Need to join outputs differently depending on file types.
                if in1_gff_format:
                    # Output is GFF with added attribute 'closest feature.'

                    # Invervals are in BED coordinates; need to convert to GFF.
                    line = convert_bed_coords_to_gff( line )
                    closest_feature = convert_bed_coords_to_gff( closest_feature )

                    # Replace double quotes with single quotes in closest feature's attributes.
                    out_file.write( "%s closest_feature \"%s\" \n" %
                                    ( "\t".join( line.fields ),
                                      "\t".join( closest_feature.fields ).replace( "\"", "\\\"" )
                                      ) )
                else:
                    # Output is BED + closest feature fields.
                    output_line_fields = []
                    output_line_fields.extend( line.fields )
                    output_line_fields.extend( closest_feature.fields )
                    out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) )
            else:
                out_file.write( "%s\n" % result )
    except ParseError as exc:
        fail( "Invalid file format: %s" % str( exc ) )

    print("Direction: %s" % (direction))
    if g1.skipped > 0:
        print(skipped( g1, filedesc=" of 1st dataset" ))
    if g2.skipped > 0:
        print(skipped( g2, filedesc=" of 2nd dataset" ))
示例#3
0
def main():
    options, args = doc_optparse.parse(__doc__)
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(
            options.cols1)
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg(
            options.cols2)
        in1_gff_format = bool(options.gff1)
        in2_gff_format = bool(options.gff2)
        in_fname, in2_fname, out_fname, direction = args
    except:
        doc_optparse.exception()

    # Set readers to handle either GFF or default format.
    if in1_gff_format:
        in1_reader_wrapper = GFFIntervalToBEDReaderWrapper
    else:
        in1_reader_wrapper = NiceReaderWrapper
    if in2_gff_format:
        in2_reader_wrapper = GFFIntervalToBEDReaderWrapper
    else:
        in2_reader_wrapper = NiceReaderWrapper

    g1 = in1_reader_wrapper(fileinput.FileInput(in_fname),
                            chrom_col=chr_col_1,
                            start_col=start_col_1,
                            end_col=end_col_1,
                            strand_col=strand_col_1,
                            fix_strand=True)
    g2 = in2_reader_wrapper(fileinput.FileInput(in2_fname),
                            chrom_col=chr_col_2,
                            start_col=start_col_2,
                            end_col=end_col_2,
                            strand_col=strand_col_2,
                            fix_strand=True)

    # Find flanking features.
    out_file = open(out_fname, "w")
    try:
        for result in proximal_region_finder([g1, g2], direction):
            if type(result) is list:
                line, closest_feature = result
                # Need to join outputs differently depending on file types.
                if in1_gff_format:
                    # Output is GFF with added attribute 'closest feature.'

                    # Invervals are in BED coordinates; need to convert to GFF.
                    line = convert_bed_coords_to_gff(line)
                    closest_feature = convert_bed_coords_to_gff(
                        closest_feature)

                    # Replace double quotes with single quotes in closest feature's attributes.
                    out_file.write(
                        "%s closest_feature \"%s\" \n" %
                        ("\t".join(line.fields), "\t".join(
                            closest_feature.fields).replace("\"", "\\\"")))
                else:
                    # Output is BED + closest feature fields.
                    output_line_fields = []
                    output_line_fields.extend(line.fields)
                    output_line_fields.extend(closest_feature.fields)
                    out_file.write("%s\n" % ("\t".join(output_line_fields)))
            else:
                out_file.write("%s\n" % result)
    except ParseError as exc:
        fail("Invalid file format: %s" % str(exc))

    print("Direction: %s" % (direction))
    if g1.skipped > 0:
        print(skipped(g1, filedesc=" of 1st dataset"))
    if g2.skipped > 0:
        print(skipped(g2, filedesc=" of 2nd dataset"))