def main(): mincols = 1 options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) if options.mincols: mincols = int( options.mincols ) pieces = bool( options.pieces ) in1_gff_format = bool( options.gff1 ) in2_gff_format = bool( options.gff2 ) in_fname, in2_fname, out_fname = args except: doc_optparse.exception() # Set readers to handle either GFF or default format. if in1_gff_format: in1_reader_wrapper = GFFReaderWrapper else: in1_reader_wrapper = NiceReaderWrapper if in2_gff_format: in2_reader_wrapper = GFFReaderWrapper else: in2_reader_wrapper = NiceReaderWrapper g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True ) if in1_gff_format: # Intersect requires coordinates in BED format. g1.convert_to_bed_coord = True g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True ) if in2_gff_format: # Intersect requires coordinates in BED format. g2.convert_to_bed_coord = True out_file = open( out_fname, "w" ) try: for feature in intersect( [g1, g2], pieces=pieces, mincols=mincols ): if isinstance( feature, GFFFeature ): # Convert back to GFF coordinates since reader converted automatically. convert_bed_coords_to_gff( feature ) for interval in feature.intervals: out_file.write( "%s\n" % "\t".join( interval.fields ) ) elif isinstance( feature, GenomicInterval ): out_file.write( "%s\n" % "\t".join( feature.fields ) ) else: out_file.write( "%s\n" % feature ) except ParseError, e: out_file.close() fail( "Invalid file format: %s" % str( e ) )
def main(): options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) in1_gff_format = bool( options.gff1 ) in2_gff_format = bool( options.gff2 ) in_fname, in2_fname, out_fname, direction = args except: doc_optparse.exception() # Set readers to handle either GFF or default format. if in1_gff_format: in1_reader_wrapper = GFFIntervalToBEDReaderWrapper else: in1_reader_wrapper = NiceReaderWrapper if in2_gff_format: in2_reader_wrapper = GFFIntervalToBEDReaderWrapper else: in2_reader_wrapper = NiceReaderWrapper g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True ) g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True ) # Find flanking features. out_file = open( out_fname, "w" ) try: for result in proximal_region_finder([g1, g2], direction): if type( result ) is list: line, closest_feature = result # Need to join outputs differently depending on file types. if in1_gff_format: # Output is GFF with added attribute 'closest feature.' # Invervals are in BED coordinates; need to convert to GFF. line = convert_bed_coords_to_gff( line ) closest_feature = convert_bed_coords_to_gff( closest_feature ) # Replace double quotes with single quotes in closest feature's attributes. out_file.write( "%s closest_feature \"%s\" \n" % ( "\t".join( line.fields ), "\t".join( closest_feature.fields ).replace( "\"", "\\\"" ) ) ) else: # Output is BED + closest feature fields. output_line_fields = [] output_line_fields.extend( line.fields ) output_line_fields.extend( closest_feature.fields ) out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) else: out_file.write( "%s\n" % result ) except ParseError as exc: fail( "Invalid file format: %s" % str( exc ) ) print("Direction: %s" % (direction)) if g1.skipped > 0: print(skipped( g1, filedesc=" of 1st dataset" )) if g2.skipped > 0: print(skipped( g2, filedesc=" of 2nd dataset" ))
def main(): options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2) in1_gff_format = bool(options.gff1) in2_gff_format = bool(options.gff2) in_fname, in2_fname, out_fname, direction = args except: doc_optparse.exception() # Set readers to handle either GFF or default format. if in1_gff_format: in1_reader_wrapper = GFFIntervalToBEDReaderWrapper else: in1_reader_wrapper = NiceReaderWrapper if in2_gff_format: in2_reader_wrapper = GFFIntervalToBEDReaderWrapper else: in2_reader_wrapper = NiceReaderWrapper g1 = in1_reader_wrapper(fileinput.FileInput(in_fname), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True) g2 = in2_reader_wrapper(fileinput.FileInput(in2_fname), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True) # Find flanking features. out_file = open(out_fname, "w") try: for result in proximal_region_finder([g1, g2], direction): if type(result) is list: line, closest_feature = result # Need to join outputs differently depending on file types. if in1_gff_format: # Output is GFF with added attribute 'closest feature.' # Invervals are in BED coordinates; need to convert to GFF. line = convert_bed_coords_to_gff(line) closest_feature = convert_bed_coords_to_gff( closest_feature) # Replace double quotes with single quotes in closest feature's attributes. out_file.write( "%s closest_feature \"%s\" \n" % ("\t".join(line.fields), "\t".join( closest_feature.fields).replace("\"", "\\\""))) else: # Output is BED + closest feature fields. output_line_fields = [] output_line_fields.extend(line.fields) output_line_fields.extend(closest_feature.fields) out_file.write("%s\n" % ("\t".join(output_line_fields))) else: out_file.write("%s\n" % result) except ParseError as exc: fail("Invalid file format: %s" % str(exc)) print("Direction: %s" % (direction)) if g1.skipped > 0: print(skipped(g1, filedesc=" of 1st dataset")) if g2.skipped > 0: print(skipped(g2, filedesc=" of 2nd dataset"))