示例#1
0
def __main__():

    # Parse Command Line

    options, args = doc_optparse.parse(__doc__)

    try:
        range_filename = args[0]
        refindex = int(args[1])
        if options.mincols: mincols = int(options.mincols)
        else: mincols = 10
    except:
        doc_optparse.exit()

    # Load Intervals

    intersecter = intervals.Intersecter()
    for line in file(range_filename):
        fields = line.split()
        intersecter.add_interval(
            intervals.Interval(int(fields[0]), int(fields[1])))

    # Start MAF on stdout

    out = bx.align.maf.Writer(sys.stdout)

    # Iterate over input MAF

    for maf in bx.align.maf.Reader(sys.stdin):
        ref = maf.components[refindex]
        # Find overlap with reference component
        intersections = intersecter.find(ref.get_forward_strand_start(),
                                         ref.get_forward_strand_end())
        # Keep output maf ordered
        intersections.sort()
        # Write each intersecting block
        for interval in intersections:
            start = max(interval.start, ref.get_forward_strand_start())
            end = min(interval.end, ref.get_forward_strand_end())
            sliced = maf.slice_by_component(refindex, start, end)
            good = True
            for c in sliced.components:
                if c.size < 1:
                    good = False
            if good and sliced.text_size > mincols: out.write(sliced)

    # Close output MAF

    out.close()
def __main__():

    # Parse Command Line

    options, args = doc_optparse.parse( __doc__ )

    try:
        range_filename = args[ 0 ]
        refindex = int( args[ 1 ] )
        if options.mincols: mincols = int( options.mincols )
        else: mincols = 10
    except:
        doc_optparse.exit()

    # Load Intervals

    intersecter = intervals.Intersecter()
    for line in file( range_filename ):
        fields = line.split()
        intersecter.add_interval( intervals.Interval( int( fields[0] ), int( fields[1] ) ) )

    # Start MAF on stdout

    out = bx.align.maf.Writer( sys.stdout )

    # Iterate over input MAF

    for maf in bx.align.maf.Reader( sys.stdin ):
        ref = maf.components[ refindex ]
        # Find overlap with reference component
        intersections = intersecter.find( ref.get_forward_strand_start(), ref.get_forward_strand_end() )
        # Keep output maf ordered
        intersections.sort()
        # Write each intersecting block
        for interval in intersections: 
            start = max( interval.start, ref.get_forward_strand_start() )
            end = min( interval.end, ref.get_forward_strand_end() )
            sliced = maf.slice_by_component( refindex, start, end ) 
            good = True
            for c in sliced.components: 
                if c.size < 1: 
                    good = False
            if good and sliced.text_size > mincols: out.write( sliced )
         
    # Close output MAF

    out.close()
def __main__():

    # Parse Command Line

    options, args = doc_optparse.parse( __doc__ )

    try:
        range_filename = args[ 0 ]
        try: 
            refindex = int( args[ 1 ] )
            refname = None
        except: 
            refindex = None
            refname = args[ 1 ]
        if options.mincols: mincols = int( options.mincols )
        else: mincols = 10
        if options.prefix: prefix = options.prefix
        else: prefix = ""
    except:
        doc_optparse.exit()

    # Load Intervals

    intersecters = dict()    
    for line in file( range_filename ):
        fields = line.split()
        src = prefix + fields[0]
        if not src in intersecters: intersecters[src] = intervals.Intersecter()
        intersecters[src].add_interval( intervals.Interval( int( fields[1] ), int( fields[2] ) ) )

    # Start MAF on stdout

    out = bx.align.maf.Writer( sys.stdout )

    # Iterate over input MAF

    for maf in bx.align.maf.Reader( sys.stdin ):
        if refname: 
            sourcenames = [ cmp.src.split('.')[0] for cmp in maf.components ]
            try: refindex = sourcenames.index( refname )
            except:
                continue

        ref_component = maf.components[ refindex ]
        # Find overlap with reference component
        if not ( ref_component.src in intersecters ): continue
        intersections = intersecters[ ref_component.src ].find( ref_component.start, ref_component.end )
        # Keep output maf ordered
        intersections.sort()
        # Write each intersecting block
        for interval in intersections: 
            start = max( interval.start, ref_component.start )
            end = min( interval.end, ref_component.end )
            sliced = maf.slice_by_component( refindex, start, end ) 
            good = True
            for c in sliced.components: 
                if c.size < 1: 
                    good = False
            if good and sliced.text_size > mincols: out.write( sliced )
         
    # Close output MAF

    out.close()
def __main__():

    # Parse Command Line

    options, args = cookbook.doc_optparse.parse( __doc__ )
    
    try:
        mincols=0
        
        if options.dbkey: dbkey = options.dbkey
        else: dbkey="?"
        
        
        if options.chromCol: chromCol= int(options.chromCol) - 1
        else: 
            print >>sys.stderr, "Chromosome column has not been specified."
            sys.exit()
        
        if options.startCol: startCol= int(options.startCol) - 1
        else: 
            print >>sys.stderr, "Start column has not been specified."
            sys.exit()
        
        if options.endCol: endCol= int(options.endCol) - 1
        else: 
            print >>sys.stderr, "End column has not been specified."
            sys.exit()
        
        if options.strandCol: strandCol= int(options.strandCol) - 1
        else: 
            print >>sys.stderr, "Strand column has not been specified."
            sys.exit()
        
        if options.mafFile: mafFile= options.mafFile
        else: 
            print >>sys.stderr, "Desired source MAF type has not been specified."
            sys.exit()
        
        if options.interval_file: interval_file= options.interval_file
        else: 
            print >>sys.stderr, "Input interval file has not been specified."
            sys.exit()
        
        if options.output_file: output_file= options.output_file
        else: 
            print >>sys.stderr, "Output file has not been specified."
            sys.exit()
    except:
        sys.exit()
                
    if dbkey == "?": 
        print >>sys.stderr, "You must specify a proper build in order to extract alignments."
        sys.exit()
    
    
    max_col_referenced = max([chromCol, startCol, endCol, strandCol])
    max_col_referenced_no_strand = max([chromCol, startCol, endCol])
    
    output = open(output_file, "w");
    out = bx.align.maf.Writer( output )
    
    # Iterate over input ranges 
    num_blocks=0
    
    try:
        for maf in bx.align.maf.Reader(open(mafFile, "r")):
            try:
                for line in open(interval_file, "r").readlines():
                    try:
                        if line[0:1]=="#":
                            continue
                        fields = line.split()
                        strand_exists = True
                        if len(fields) - 1 < max_col_referenced:
                            strand_exists = False
                        
                        src, start, end = dbkey + "." + fields[chromCol], int( fields[startCol] ), int( fields[endCol] )
                        if strandCol < 0 or not strand_exists:
                            strand = "+"
                        else:
                            strand = fields[strandCol]
                        ref = maf.get_component_by_src( src )
                        
                        #save old score here for later use
                        old_score =  maf.score
                        # If the reference component is on the '-' strand we should complement the interval
                        if ref.strand == '-':
                            slice_start = max( ref.src_size - end, ref.start )
                            slice_end = max( ref.src_size - start, ref.end )
                        else:
                            slice_start = max( start, ref.start )
                            slice_end = min( end, ref.end )
                        
                        #when interval is out-of-range (not in maf index), fail silently: else could create tons of scroll
                        try:
                            sliced = maf.slice_by_component( ref, slice_start, slice_end ) 
                        except:
                            continue
                            
                        good = True
                        for c in sliced.components: 
                            if c.size < 1: 
                                good = False
                        if good and sliced.text_size > mincols:
                            if strand != ref.strand: sliced = sliced.reverse_complement()
                            # restore old score, may not be accurate, but it is better than 0 for everything
                            sliced.score = old_score
                            out.write( sliced )
                            num_blocks+=1
                    except:
                        continue
            except:
                print "Error Reading Interval File."
        # Close output MAF
        out.close()
        print num_blocks, "MAF blocks extracted."
    except:
        print "Error Reading MAF File"
def __main__():

    # Parse Command Line

    options, args = cookbook.doc_optparse.parse(__doc__)

    try:
        mincols = 0

        if options.dbkey: dbkey = options.dbkey
        else: dbkey = "?"

        if options.chromCol: chromCol = int(options.chromCol) - 1
        else:
            print >> sys.stderr, "Chromosome column has not been specified."
            sys.exit()

        if options.startCol: startCol = int(options.startCol) - 1
        else:
            print >> sys.stderr, "Start column has not been specified."
            sys.exit()

        if options.endCol: endCol = int(options.endCol) - 1
        else:
            print >> sys.stderr, "End column has not been specified."
            sys.exit()

        if options.strandCol: strandCol = int(options.strandCol) - 1
        else:
            print >> sys.stderr, "Strand column has not been specified."
            sys.exit()

        if options.mafFile: mafFile = options.mafFile
        else:
            print >> sys.stderr, "Desired source MAF type has not been specified."
            sys.exit()

        if options.interval_file: interval_file = options.interval_file
        else:
            print >> sys.stderr, "Input interval file has not been specified."
            sys.exit()

        if options.output_file: output_file = options.output_file
        else:
            print >> sys.stderr, "Output file has not been specified."
            sys.exit()
    except:
        sys.exit()

    if dbkey == "?":
        print >> sys.stderr, "You must specify a proper build in order to extract alignments."
        sys.exit()

    max_col_referenced = max([chromCol, startCol, endCol, strandCol])
    max_col_referenced_no_strand = max([chromCol, startCol, endCol])

    output = open(output_file, "w")
    out = bx.align.maf.Writer(output)

    # Iterate over input ranges
    num_blocks = 0

    try:
        for maf in bx.align.maf.Reader(open(mafFile, "r")):
            try:
                for line in open(interval_file, "r").readlines():
                    try:
                        if line[0:1] == "#":
                            continue
                        fields = line.split()
                        strand_exists = True
                        if len(fields) - 1 < max_col_referenced:
                            strand_exists = False

                        src, start, end = dbkey + "." + fields[chromCol], int(
                            fields[startCol]), int(fields[endCol])
                        if strandCol < 0 or not strand_exists:
                            strand = "+"
                        else:
                            strand = fields[strandCol]
                        ref = maf.get_component_by_src(src)

                        #save old score here for later use
                        old_score = maf.score
                        # If the reference component is on the '-' strand we should complement the interval
                        if ref.strand == '-':
                            slice_start = max(ref.src_size - end, ref.start)
                            slice_end = max(ref.src_size - start, ref.end)
                        else:
                            slice_start = max(start, ref.start)
                            slice_end = min(end, ref.end)

                        #when interval is out-of-range (not in maf index), fail silently: else could create tons of scroll
                        try:
                            sliced = maf.slice_by_component(
                                ref, slice_start, slice_end)
                        except:
                            continue

                        good = True
                        for c in sliced.components:
                            if c.size < 1:
                                good = False
                        if good and sliced.text_size > mincols:
                            if strand != ref.strand:
                                sliced = sliced.reverse_complement()
                            # restore old score, may not be accurate, but it is better than 0 for everything
                            sliced.score = old_score
                            out.write(sliced)
                            num_blocks += 1
                    except:
                        continue
            except:
                print "Error Reading Interval File."
        # Close output MAF
        out.close()
        print num_blocks, "MAF blocks extracted."
    except:
        print "Error Reading MAF File"