def __main__():
    input_block_filename = sys.argv[1].strip()
    input_maf_filename = sys.argv[2].strip()
    output_filename1 = sys.argv[3].strip()
    block_col = int(sys.argv[4].strip()) - 1
    if block_col < 0:
        print >> sys.stderr, "Invalid column specified"
        sys.exit(0)
    species = maf_utilities.parse_species_option(sys.argv[5].strip())

    maf_writer = bx.align.maf.Writer(open(output_filename1, 'w'))
    # we want to maintain order of block file and write blocks as many times as they are listed
    failed_lines = []
    for ctr, line in enumerate(open(input_block_filename, 'r')):
        try:
            block_wanted = int(line.split("\t")[block_col].strip())
        except:
            failed_lines.append(str(ctr))
            continue
        try:
            for count, block in enumerate(
                    bx.align.maf.Reader(open(input_maf_filename, 'r'))):
                if count == block_wanted:
                    if species:
                        block = block.limit_to_species(species)
                    maf_writer.write(block)
                    break
        except:
            print >> sys.stderr, "Your MAF file appears to be malformed."
            sys.exit()
    if len(failed_lines) > 0:
        print "Failed to extract from %i lines (%s)." % (
            len(failed_lines), ",".join(failed_lines))
Exemplo n.º 2
0
def main():
    species = maf_utilities.parse_species_option(sys.argv[1])
    if species:
        spec_len = len(species)
    else:
        spec_len = 0
    try:
        maf_reader = bx.align.maf.Reader(open(sys.argv[2], 'r'))
        maf_writer = bx.align.maf.Writer(open(sys.argv[3], 'w'))
    except Exception:
        print("Your MAF file appears to be malformed.", file=sys.stderr)
        sys.exit()
    allow_partial = False
    if int(sys.argv[4]):
        allow_partial = True
    min_species_per_block = int(sys.argv[5])

    maf_blocks_kept = 0
    for m in maf_reader:
        if species:
            m = m.limit_to_species(species)
        m.remove_all_gap_columns()
        spec_in_block_len = len(maf_utilities.get_species_in_block(m))
        if (not species or allow_partial or spec_in_block_len == spec_len) and spec_in_block_len > min_species_per_block:
            maf_writer.write(m)
            maf_blocks_kept += 1

    maf_reader.close()
    maf_writer.close()

    print("Restricted to species: %s." % ", ".join(species))
    print("%i MAF blocks have been kept." % maf_blocks_kept)
Exemplo n.º 3
0
def __main__():
    input_block_filename = sys.argv[1].strip()
    input_maf_filename = sys.argv[2].strip()
    output_filename1 = sys.argv[3].strip()
    block_col = int( sys.argv[4].strip() ) - 1
    if block_col < 0:
        print >> sys.stderr, "Invalid column specified"
        sys.exit(0)
    species = maf_utilities.parse_species_option( sys.argv[5].strip() )

    maf_writer = bx.align.maf.Writer( open( output_filename1, 'w' ) )
    # we want to maintain order of block file and write blocks as many times as they are listed
    failed_lines = []
    for ctr, line in enumerate( open( input_block_filename, 'r' ) ):
        try:
            block_wanted = int( line.split( "\t" )[block_col].strip() )
        except:
            failed_lines.append( str( ctr ) )
            continue
        try:
            for count, block in enumerate( bx.align.maf.Reader( open( input_maf_filename, 'r' ) ) ):
                if count == block_wanted:
                    if species:
                        block = block.limit_to_species( species )
                    maf_writer.write( block )
                    break
        except:
            print >>sys.stderr, "Your MAF file appears to be malformed."
            sys.exit()
    if len( failed_lines ) > 0:
        print "Failed to extract from %i lines (%s)." % ( len( failed_lines ), ",".join( failed_lines ) )
Exemplo n.º 4
0
def main():
    species = maf_utilities.parse_species_option( sys.argv[1] )
    if species:
        spec_len = len( species )
    else:
        spec_len = 0
    try:
        maf_reader = bx.align.maf.Reader( open( sys.argv[2], 'r' ) )
        maf_writer = bx.align.maf.Writer( open( sys.argv[3], 'w' ) )
    except:
        print("Your MAF file appears to be malformed.", file=sys.stderr)
        sys.exit()
    allow_partial = False
    if int( sys.argv[4] ):
        allow_partial = True
    min_species_per_block = int( sys.argv[5] )

    maf_blocks_kept = 0
    for m in maf_reader:
        if species:
            m = m.limit_to_species( species )
        m.remove_all_gap_columns()
        spec_in_block_len = len( maf_utilities.get_species_in_block( m ) )
        if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block:
            maf_writer.write( m )
            maf_blocks_kept += 1

    maf_reader.close()
    maf_writer.close()

    print("Restricted to species: %s." % ", ".join( species ))
    print("%i MAF blocks have been kept." % maf_blocks_kept)
Exemplo n.º 5
0
def main():
    #Read command line arguments
    try:
        script_file = sys.argv.pop(1)
        maf_file = sys.argv.pop(1)
        out_file = sys.argv.pop(1)
        additional_files_path = sys.argv.pop(1)
        species = maf_utilities.parse_species_option(sys.argv.pop(1))
        min_size = int(sys.argv.pop(1))
        max_size = int(sys.argv.pop(1))
        if max_size < 1:
            max_size = sys.maxint
        min_species_per_block = int(sys.argv.pop(1))
        exclude_incomplete_blocks = int(sys.argv.pop(1))
        if species:
            num_species = len(species)
        else:
            num_species = len(sys.argv.pop(1).split(','))
    except:
        print >> sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep"
        sys.exit()

    #Open input and output MAF files
    try:
        maf_reader = bx.align.maf.Reader(open(maf_file, 'r'))
        maf_writer = bx.align.maf.Writer(open(out_file, 'w'))
    except:
        print >> sys.stderr, "Your MAF file appears to be malformed."
        sys.exit()

    #Save script file for debuging/verification info later
    os.mkdir(additional_files_path)
    shutil.copy(script_file, os.path.join(additional_files_path, 'debug.txt'))

    #Loop through blocks, running filter on each
    #'maf_block' and 'ret_val' are used/shared in the provided code file
    #'ret_val' should be set to True if the block is to be kept
    i = 0
    blocks_kept = 0
    for i, maf_block in enumerate(maf_reader):
        if min_size <= maf_block.text_size <= max_size:
            local = {'maf_block': maf_block, 'ret_val': False}
            execfile(script_file, {}, local)
            if local['ret_val']:
                #Species limiting must be done after filters as filters could be run on non-requested output species
                if species:
                    maf_block = maf_block.limit_to_species(species)
                if len(maf_block.components) >= min_species_per_block and (
                        not exclude_incomplete_blocks
                        or len(maf_block.components) >= num_species):
                    maf_writer.write(maf_block)
                    blocks_kept += 1
    maf_writer.close()
    maf_reader.close()
    if i == 0:
        print "Your file contains no valid maf_blocks."
    else:
        print 'Kept %s of %s blocks (%.2f%%).' % (
            blocks_kept, i + 1, float(blocks_kept) / float(i + 1) * 100.0)
Exemplo n.º 6
0
def main():
    # Read command line arguments
    try:
        script_file = sys.argv.pop(1)
        maf_file = sys.argv.pop(1)
        out_file = sys.argv.pop(1)
        additional_files_path = sys.argv.pop(1)
        species = maf_utilities.parse_species_option(sys.argv.pop(1))
        min_size = int(sys.argv.pop(1))
        max_size = int(sys.argv.pop(1))
        if max_size < 1:
            max_size = sys.maxint
        min_species_per_block = int(sys.argv.pop(1))
        exclude_incomplete_blocks = int(sys.argv.pop(1))
        if species:
            num_species = len(species)
        else:
            num_species = len(sys.argv.pop(1).split(","))
    except:
        print >> sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep"
        sys.exit()

    # Open input and output MAF files
    try:
        maf_reader = bx.align.maf.Reader(open(maf_file, "r"))
        maf_writer = bx.align.maf.Writer(open(out_file, "w"))
    except:
        print >> sys.stderr, "Your MAF file appears to be malformed."
        sys.exit()

    # Save script file for debuging/verification info later
    os.mkdir(additional_files_path)
    shutil.copy(script_file, os.path.join(additional_files_path, "debug.txt"))

    # Loop through blocks, running filter on each
    #'maf_block' and 'ret_val' are used/shared in the provided code file
    #'ret_val' should be set to True if the block is to be kept
    i = 0
    blocks_kept = 0
    for i, maf_block in enumerate(maf_reader):
        if min_size <= maf_block.text_size <= max_size:
            local = {"maf_block": maf_block, "ret_val": False}
            execfile(script_file, {}, local)
            if local["ret_val"]:
                # Species limiting must be done after filters as filters could be run on non-requested output species
                if species:
                    maf_block = maf_block.limit_to_species(species)
                if len(maf_block.components) >= min_species_per_block and (
                    not exclude_incomplete_blocks or len(maf_block.components) >= num_species
                ):
                    maf_writer.write(maf_block)
                    blocks_kept += 1
    maf_writer.close()
    maf_reader.close()
    if i == 0:
        print "Your file contains no valid maf_blocks."
    else:
        print "Kept %s of %s blocks (%.2f%%)." % (blocks_kept, i + 1, float(blocks_kept) / float(i + 1) * 100.0)
def __main__():
    try:
        maf_reader = maf.Reader(open(sys.argv[1]))
    except Exception as e:
        maf_utilities.tool_fail("Error opening input MAF: %s" % e)
    try:
        file_out = open(sys.argv[2], 'w')
    except Exception as e:
        maf_utilities.tool_fail("Error opening file for output: %s" % e)
    try:
        species = maf_utilities.parse_species_option(sys.argv[3])
        if species:
            num_species = len(species)
        else:
            num_species = 0
    except Exception as e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
    try:
        partial = sys.argv[4]
    except Exception as e:
        maf_utilities.tool_fail("Error determining keep partial value: %s" % e)

    if species:
        print("Restricted to species: %s" % ', '.join(species))
    else:
        print("Not restricted to species.")

    for block_num, block in enumerate(maf_reader):
        if species:
            block = block.limit_to_species(species)
            if len(maf_utilities.get_species_in_block(
                    block)) < num_species and partial == "partial_disallowed":
                continue
        spec_counts = {}
        for component in block.components:
            spec, chrom = maf_utilities.src_split(component.src)
            if spec not in spec_counts:
                spec_counts[spec] = 0
            else:
                spec_counts[spec] += 1
            file_out.write("%s\n" % maf_utilities.get_fasta_header(
                component, {
                    'block_index': block_num,
                    'species': spec,
                    'sequence_index': spec_counts[spec]
                },
                suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec])))
            file_out.write("%s\n" % component.text)
        file_out.write("\n")
    file_out.close()
def __main__():
    try:
        species = maf_utilities.parse_species_option(sys.argv[1])
    except Exception as e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
    try:
        input_filename = sys.argv[2]
    except Exception as e:
        maf_utilities.tool_fail("Error reading MAF filename: %s" % e)
    try:
        file_out = open(sys.argv[3], 'w')
    except Exception as e:
        maf_utilities.tool_fail("Error opening file for output: %s" % e)

    if species:
        print "Restricted to species: %s" % ', '.join(species)
    else:
        print "Not restricted to species."

    if not species:
        try:
            species = maf_utilities.get_species_in_maf(input_filename)
        except Exception as e:
            maf_utilities.tool_fail(
                "Error determining species in input MAF: %s" % e)

    for spec in species:
        file_out.write(">" + spec + "\n")
        try:
            for start_block in maf.Reader(open(input_filename, 'r')):
                for block in maf_utilities.iter_blocks_split_by_species(
                        start_block):
                    block.remove_all_gap_columns()  # remove extra gaps
                    component = block.get_component_by_src_start(
                        spec
                    )  # blocks only have one occurrence of a particular species, so this is safe
                    if component:
                        file_out.write(component.text)
                    else:
                        file_out.write("-" * block.text_size)
        except Exception as e:
            maf_utilities.tool_fail(
                "Your MAF file appears to be malformed: %s" % e)
        file_out.write("\n")
    file_out.close()
def __main__():
    try:
        maf_reader = maf.Reader(open(sys.argv[1]))
    except Exception as e:
        maf_utilities.tool_fail("Error opening input MAF: %s" % e)
    try:
        file_out = open(sys.argv[2], 'w')
    except Exception as e:
        maf_utilities.tool_fail("Error opening file for output: %s" % e)
    try:
        species = maf_utilities.parse_species_option(sys.argv[3])
        if species:
            num_species = len(species)
        else:
            num_species = 0
    except Exception as e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
    try:
        partial = sys.argv[4]
    except Exception as e:
        maf_utilities.tool_fail("Error determining keep partial value: %s" % e)

    if species:
        print("Restricted to species: %s" % ', '.join(species))
    else:
        print("Not restricted to species.")

    for block_num, block in enumerate(maf_reader):
        if species:
            block = block.limit_to_species(species)
            if len(maf_utilities.get_species_in_block(block)) < num_species and partial == "partial_disallowed":
                continue
        spec_counts = {}
        for component in block.components:
            spec, chrom = maf_utilities.src_split(component.src)
            if spec not in spec_counts:
                spec_counts[spec] = 0
            else:
                spec_counts[spec] += 1
            d = OrderedDict([('block_index', block_num), ('species', spec), ('sequence_index', spec_counts[spec])])
            file_out.write("%s\n" % maf_utilities.get_fasta_header(component, d, suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec])))
            file_out.write("%s\n" % component.text)
        file_out.write("\n")
    file_out.close()
Exemplo n.º 10
0
def __main__():
    try:
        species = maf_utilities.parse_species_option(sys.argv[1])
    except Exception as e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
    try:
        input_filename = sys.argv[2]
    except Exception as e:
        maf_utilities.tool_fail("Error reading MAF filename: %s" % e)
    try:
        file_out = open(sys.argv[3], 'w')
    except Exception as e:
        maf_utilities.tool_fail("Error opening file for output: %s" % e)

    if species:
        print("Restricted to species: %s" % ', '.join(species))
    else:
        print("Not restricted to species.")

    if not species:
        try:
            species = maf_utilities.get_species_in_maf(input_filename)
        except Exception as e:
            maf_utilities.tool_fail("Error determining species in input MAF: %s" % e)

    for spec in species:
        file_out.write(">" + spec + "\n")
        try:
            for start_block in maf.Reader(open(input_filename, 'r')):
                for block in maf_utilities.iter_blocks_split_by_species(start_block):
                    block.remove_all_gap_columns()  # remove extra gaps
                    component = block.get_component_by_src_start(spec)  # blocks only have one occurrence of a particular species, so this is safe
                    if component:
                        file_out.write(component.text)
                    else:
                        file_out.write("-" * block.text_size)
        except Exception as e:
            maf_utilities.tool_fail("Your MAF file appears to be malformed: %s" % e)
        file_out.write("\n")
    file_out.close()
Exemplo n.º 11
0
def __main__():
    #Parse Command Line
    input_file = sys.argv.pop( 1 )
    output_file = sys.argv.pop( 1 )
    species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) )
    
    try:
        maf_writer = bx.align.maf.Writer( open( output_file, 'w' ) )
    except:
        print sys.stderr, "Unable to open output file"
        sys.exit()
    try:
        count = 0
        for count, maf in enumerate( bx.align.maf.Reader( open( input_file ) ) ):
            maf = maf.reverse_complement()
            if species:
                maf = maf.limit_to_species( species )
            maf_writer.write( maf )
    except:
        print >>sys.stderr, "Your MAF file appears to be malformed."
        sys.exit()
    print "%i regions were reverse complemented." % count
    maf_writer.close()
Exemplo n.º 12
0
def __main__():
    # Parse Command Line
    input_file = sys.argv.pop(1)
    output_file = sys.argv.pop(1)
    species = maf_utilities.parse_species_option(sys.argv.pop(1))

    try:
        maf_writer = bx.align.maf.Writer(open(output_file, 'w'))
    except Exception:
        print(sys.stderr, "Unable to open output file")
        sys.exit()
    try:
        count = 0
        for count, maf in enumerate(bx.align.maf.Reader(open(input_file))):  # noqa: B007
            maf = maf.reverse_complement()
            if species:
                maf = maf.limit_to_species(species)
            maf_writer.write(maf)
    except Exception:
        print("Your MAF file appears to be malformed.", file=sys.stderr)
        sys.exit()
    print("%i regions were reverse complemented." % count)
    maf_writer.close()
Exemplo n.º 13
0
def __main__():
    try:
        species = maf_utilities.parse_species_option(sys.argv[1])
    except Exception, e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
from bx.align import maf
from galaxy.tools.util import maf_utilities

assert sys.version_info[:2] >= ( 2, 4 )

def __main__():
    try:
        maf_reader = maf.Reader( open( sys.argv[1] ) )
    except Exception, e:
        maf_utilities.tool_fail( "Error opening input MAF: %s" % e )
    try:
        file_out = open( sys.argv[2], 'w' )
    except Exception, e:
        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
    try:
        species = maf_utilities.parse_species_option( sys.argv[3] )
        if species:
            num_species = len( species )
        else:
            num_species = 0
    except Exception, e:
        maf_utilities.tool_fail( "Error determining species value: %s" % e )
    try:
        partial = sys.argv[4]
    except Exception, e:
        maf_utilities.tool_fail( "Error determining keep partial value: %s" % e )
    
    if species:
        print "Restricted to species: %s" % ', '.join( species )
    else:
        print "Not restricted to species."
Exemplo n.º 15
0
def __main__():
    index = index_filename = None

    # Parse Command Line
    options, args = doc_optparse.parse(__doc__)

    if options.dbkey:
        dbkey = options.dbkey
    else:
        dbkey = None
    if dbkey in [None, "?"]:
        maf_utilities.tool_fail("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.")

    species = maf_utilities.parse_species_option(options.species)

    if options.chromCol:
        chromCol = int(options.chromCol) - 1
    else:
        maf_utilities.tool_fail("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.")

    if options.startCol:
        startCol = int(options.startCol) - 1
    else:
        maf_utilities.tool_fail("Start column not set, click the pencil icon in the history item to set the metadata attributes.")

    if options.endCol:
        endCol = int(options.endCol) - 1
    else:
        maf_utilities.tool_fail("End column not set, click the pencil icon in the history item to set the metadata attributes.")

    if options.strandCol:
        strandCol = int(options.strandCol) - 1
    else:
        strandCol = -1

    if options.interval_file:
        interval_file = options.interval_file
    else:
        maf_utilities.tool_fail("Input interval file has not been specified.")

    if options.output_file:
        output_file = options.output_file
    else:
        maf_utilities.tool_fail("Output file has not been specified.")

    split_blocks_by_species = remove_all_gap_columns = False
    if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
        split_blocks_by_species = True
        if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
            remove_all_gap_columns = True
    else:
        remove_all_gap_columns = True
    # Finish parsing command line

    # Open indexed access to MAFs
    if options.mafType:
        if options.indexLocation:
            index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation)
        else:
            index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile)
        if index is None:
            maf_utilities.tool_fail("The MAF source specified (%s) appears to be invalid." % (options.mafType))
    elif options.mafFile:
        index, index_filename = maf_utilities.open_or_build_maf_index(options.mafFile, options.mafIndex, species=[dbkey])
        if index is None:
            maf_utilities.tool_fail("Your MAF file appears to be malformed.")
    else:
        maf_utilities.tool_fail("Desired source MAF type has not been specified.")

    # Create MAF writter
    out = bx.align.maf.Writer(open(output_file, "w"))

    # Iterate over input regions
    num_blocks = 0
    num_regions = None
    for num_regions, region in enumerate(bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)):
        src = maf_utilities.src_merge(dbkey, region.chrom)
        for block in index.get_as_iterator(src, region.start, region.end):
            if split_blocks_by_species:
                blocks = [new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region(new_block.get_component_by_src_start(dbkey), region)]
            else:
                blocks = [block]
            for block in blocks:
                block = maf_utilities.chop_block_by_region(block, src, region)
                if block is not None:
                    if species is not None:
                        block = block.limit_to_species(species)
                    block = maf_utilities.orient_block_by_region(block, src, region)
                    if remove_all_gap_columns:
                        block.remove_all_gap_columns()
                    out.write(block)
                    num_blocks += 1

    # Close output MAF
    out.close()

    # remove index file if created during run
    maf_utilities.remove_temp_index_file(index_filename)

    if num_blocks:
        print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1)))
    elif num_regions is not None:
        print("No MAF blocks could be extracted for %i regions." % (num_regions + 1))
    else:
        print("No valid regions have been provided.")
def __main__():
    # Parse Command Line
    options, args = doc_optparse.parse(__doc__)
    mincols = 0
    strand_col = -1

    if options.dbkey:
        primary_species = options.dbkey
    else:
        primary_species = None
    if primary_species in [None, "?", "None"]:
        stop_err("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.")

    include_primary = True
    secondary_species = maf_utilities.parse_species_option(options.species)
    if secondary_species:
        species = list(secondary_species)  # make copy of species list
        if primary_species in secondary_species:
            secondary_species.remove(primary_species)
        else:
            include_primary = False
    else:
        species = None

    if options.interval_file:
        interval_file = options.interval_file
    else:
        stop_err("Input interval file has not been specified.")

    if options.output_file:
        output_file = options.output_file
    else:
        stop_err("Output file has not been specified.")

    if not options.geneBED:
        if options.chromCol:
            chr_col = int(options.chromCol) - 1
        else:
            stop_err("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.")

        if options.startCol:
            start_col = int(options.startCol) - 1
        else:
            stop_err("Start column not set, click the pencil icon in the history item to set the metadata attributes.")

        if options.endCol:
            end_col = int(options.endCol) - 1
        else:
            stop_err("End column not set, click the pencil icon in the history item to set the metadata attributes.")

        if options.strandCol:
            strand_col = int(options.strandCol) - 1

    mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir

    overwrite_with_gaps = True
    if options.overwrite_with_gaps and options.overwrite_with_gaps.lower() == 'false':
        overwrite_with_gaps = False

    # Finish parsing command line

    # get index for mafs based on type
    index = index_filename = None
    # using specified uid for locally cached
    if options.mafSourceType.lower() in ["cached"]:
        index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile)
        if index is None:
            stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource))
    elif options.mafSourceType.lower() in ["user"]:
        # index maf for use here, need to remove index_file when finished
        index, index_filename = maf_utilities.open_or_build_maf_index(options.mafSource, options.mafIndex, species=[primary_species])
        if index is None:
            stop_err("Your MAF file appears to be malformed.")
    else:
        stop_err("Invalid MAF source type specified.")

    # open output file
    output = open(output_file, "w")

    if options.geneBED:
        region_enumerator = maf_utilities.line_enumerator(open(interval_file, "r").readlines())
    else:
        region_enumerator = enumerate(bx.intervals.io.NiceReaderWrapper(
            open(interval_file, 'r'), chrom_col=chr_col, start_col=start_col,
            end_col=end_col, strand_col=strand_col, fix_strand=True,
            return_header=False, return_comments=False))

    # Step through intervals
    regions_extracted = 0
    line_count = 0
    for line_count, line in region_enumerator:
        try:
            if options.geneBED:  # Process as Gene BED
                try:
                    starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(line)
                    # create spliced alignment object
                    alignment = maf_utilities.get_spliced_region_alignment(
                        index, primary_species, fields[0], starts, ends,
                        strand='+', species=species, mincols=mincols,
                        overwrite_with_gaps=overwrite_with_gaps)
                    primary_name = secondary_name = fields[3]
                    alignment_strand = fields[5]
                except Exception as e:
                    print("Error loading exon positions from input line %i: %s" % (line_count, e))
                    continue
            else:  # Process as standard intervals
                try:
                    # create spliced alignment object
                    alignment = maf_utilities.get_region_alignment(
                        index, primary_species, line.chrom, line.start,
                        line.end, strand='+', species=species, mincols=mincols,
                        overwrite_with_gaps=overwrite_with_gaps)
                    primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end)
                    secondary_name = ""
                    alignment_strand = line.strand
                except Exception as e:
                    print("Error loading region positions from input line %i: %s" % (line_count, e))
                    continue

            # Write alignment to output file
            # Output primary species first, if requested
            if include_primary:
                output.write(">%s.%s\n" % (primary_species, primary_name))
                if alignment_strand == "-":
                    output.write(alignment.get_sequence_reverse_complement(primary_species))
                else:
                    output.write(alignment.get_sequence(primary_species))
                output.write("\n")
            # Output all remainging species
            for spec in secondary_species or alignment.get_species_names(skip=primary_species):
                if secondary_name:
                    output.write(">%s.%s\n" % (spec, secondary_name))
                else:
                    output.write(">%s\n" % (spec))
                if alignment_strand == "-":
                    output.write(alignment.get_sequence_reverse_complement(spec))
                else:
                    output.write(alignment.get_sequence(spec))
                output.write("\n")

            output.write("\n")
            regions_extracted += 1
        except Exception as e:
            print("Unexpected error from input line %i: %s" % (line_count, e))
            continue

    # close output file
    output.close()

    # remove index file if created during run
    maf_utilities.remove_temp_index_file(index_filename)

    # Print message about success for user
    if regions_extracted > 0:
        print("%i regions were processed successfully." % (regions_extracted))
    else:
        print("No regions were processed successfully.")
        if line_count > 0 and options.geneBED:
            print("This tool requires your input file to conform to the 12 column BED standard.")
Exemplo n.º 17
0
def __main__():
    try:
        species = maf_utilities.parse_species_option(sys.argv[1])
    except Exception, e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
def __main__():

    # Parse Command Line
    options, args = doc_optparse.parse(__doc__)
    mincols = 0
    strand_col = -1

    if options.dbkey:
        primary_species = options.dbkey
    else:
        primary_species = None
    if primary_species in [None, "?", "None"]:
        stop_err(
            "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file."
        )

    include_primary = True
    secondary_species = maf_utilities.parse_species_option(options.species)
    if secondary_species:
        species = list(secondary_species)  # make copy of species list
        if primary_species in secondary_species:
            secondary_species.remove(primary_species)
        else:
            include_primary = False
    else:
        species = None

    if options.interval_file:
        interval_file = options.interval_file
    else:
        stop_err("Input interval file has not been specified.")

    if options.output_file:
        output_file = options.output_file
    else:
        stop_err("Output file has not been specified.")

    if not options.geneBED:
        if options.chromCol:
            chr_col = int(options.chromCol) - 1
        else:
            stop_err(
                "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes."
            )

        if options.startCol:
            start_col = int(options.startCol) - 1
        else:
            stop_err("Start column not set, click the pencil icon in the history item to set the metadata attributes.")

        if options.endCol:
            end_col = int(options.endCol) - 1
        else:
            stop_err("End column not set, click the pencil icon in the history item to set the metadata attributes.")

        if options.strandCol:
            strand_col = int(options.strandCol) - 1

    mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir
    # Finish parsing command line

    # get index for mafs based on type
    index = index_filename = None
    # using specified uid for locally cached
    if options.mafSourceType.lower() in ["cached"]:
        index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile)
        if index is None:
            stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource))
    elif options.mafSourceType.lower() in ["user"]:
        # index maf for use here, need to remove index_file when finished
        index, index_filename = maf_utilities.open_or_build_maf_index(
            options.mafSource, options.mafIndex, species=[primary_species]
        )
        if index is None:
            stop_err("Your MAF file appears to be malformed.")
    else:
        stop_err("Invalid MAF source type specified.")

    # open output file
    output = open(output_file, "w")

    if options.geneBED:
        region_enumerator = maf_utilities.line_enumerator(open(interval_file, "r").readlines())
    else:
        region_enumerator = enumerate(
            bx.intervals.io.NiceReaderWrapper(
                open(interval_file, "r"),
                chrom_col=chr_col,
                start_col=start_col,
                end_col=end_col,
                strand_col=strand_col,
                fix_strand=True,
                return_header=False,
                return_comments=False,
            )
        )

    # Step through intervals
    regions_extracted = 0
    line_count = 0
    for line_count, line in region_enumerator:
        try:
            if options.geneBED:  # Process as Gene BED
                try:
                    starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(line)
                    # create spliced alignment object
                    alignment = maf_utilities.get_spliced_region_alignment(
                        index, primary_species, fields[0], starts, ends, strand="+", species=species, mincols=mincols
                    )
                    primary_name = secondary_name = fields[3]
                    alignment_strand = fields[5]
                except Exception, e:
                    print "Error loading exon positions from input line %i: %s" % (line_count, e)
                    continue
            else:  # Process as standard intervals
                try:
                    # create spliced alignment object
                    alignment = maf_utilities.get_region_alignment(
                        index,
                        primary_species,
                        line.chrom,
                        line.start,
                        line.end,
                        strand="+",
                        species=species,
                        mincols=mincols,
                    )
                    primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end)
                    secondary_name = ""
                    alignment_strand = line.strand
                except Exception, e:
                    print "Error loading region positions from input line %i: %s" % (line_count, e)
                    continue
Exemplo n.º 19
0
def __main__():
    index = index_filename = None
    mincols = 0
    
    #Parse Command Line
    options, args = doc_optparse.parse( __doc__ )
    
    if options.dbkey: dbkey = options.dbkey
    else: dbkey = None
    if dbkey in [None, "?"]:
        print >>sys.stderr, "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file."
        sys.exit()
    
    species = maf_utilities.parse_species_option( options.species )
    
    if options.chromCol: chromCol = int( options.chromCol ) - 1
    else: 
        print >>sys.stderr, "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes."
        sys.exit()
    
    if options.startCol: startCol = int( options.startCol ) - 1
    else: 
        print >>sys.stderr, "Start column not set, click the pencil icon in the history item to set the metadata attributes."
        sys.exit()
    
    if options.endCol: endCol = int( options.endCol ) - 1
    else: 
        print >>sys.stderr, "End column not set, click the pencil icon in the history item to set the metadata attributes."
        sys.exit()
    
    if options.strandCol: strandCol = int( options.strandCol ) - 1
    else: 
        strandCol = -1
    
    if options.interval_file: interval_file = options.interval_file
    else: 
        print >>sys.stderr, "Input interval file has not been specified."
        sys.exit()
    
    if options.output_file: output_file = options.output_file
    else: 
        print >>sys.stderr, "Output file has not been specified."
        sys.exit()
    #Finish parsing command line
    
    #Open indexed access to MAFs
    if options.mafType:
        if options.indexLocation:
            index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation )
        else:
            index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile )
        if index is None:
            print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType )
            sys.exit()
    elif options.mafFile:
        index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
        if index is None:
            print >> sys.stderr, "Your MAF file appears to be malformed."
            sys.exit()
    else:
        print >>sys.stderr, "Desired source MAF type has not been specified."
        sys.exit()
    
    #Create MAF writter
    out = bx.align.maf.Writer( open(output_file, "w") )
    
    #Iterate over input regions 
    num_blocks = 0
    num_regions = None
    for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ):
        src = "%s.%s" % ( dbkey, region.chrom )
        for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, species, mincols ):
            out.write( block )
            num_blocks += 1
    
    #Close output MAF
    out.close()
    
    #remove index file if created during run
    maf_utilities.remove_temp_index_file( index_filename )
    
    if num_blocks:
        print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) )
    elif num_regions is not None:
        print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 )
    else:
        print "No valid regions have been provided."
Exemplo n.º 20
0
def __main__():
    # Parse Command Line
    options, args = doc_optparse.parse(__doc__)
    mincols = 0
    strand_col = -1

    if options.dbkey:
        primary_species = options.dbkey
    else:
        primary_species = None
    if primary_species in [None, "?", "None"]:
        stop_err(
            "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file."
        )

    include_primary = True
    secondary_species = maf_utilities.parse_species_option(options.species)
    if secondary_species:
        species = list(secondary_species)  # make copy of species list
        if primary_species in secondary_species:
            secondary_species.remove(primary_species)
        else:
            include_primary = False
    else:
        species = None

    if options.interval_file:
        interval_file = options.interval_file
    else:
        stop_err("Input interval file has not been specified.")

    if options.output_file:
        output_file = options.output_file
    else:
        stop_err("Output file has not been specified.")

    if not options.geneBED:
        if options.chromCol:
            chr_col = int(options.chromCol) - 1
        else:
            stop_err(
                "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes."
            )

        if options.startCol:
            start_col = int(options.startCol) - 1
        else:
            stop_err(
                "Start column not set, click the pencil icon in the history item to set the metadata attributes."
            )

        if options.endCol:
            end_col = int(options.endCol) - 1
        else:
            stop_err(
                "End column not set, click the pencil icon in the history item to set the metadata attributes."
            )

        if options.strandCol:
            strand_col = int(options.strandCol) - 1

    mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir

    overwrite_with_gaps = True
    if options.overwrite_with_gaps and options.overwrite_with_gaps.lower(
    ) == 'false':
        overwrite_with_gaps = False

    # Finish parsing command line

    # get index for mafs based on type
    index = index_filename = None
    # using specified uid for locally cached
    if options.mafSourceType.lower() in ["cached"]:
        index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile)
        if index is None:
            stop_err("The MAF source specified (%s) appears to be invalid." %
                     (options.mafSource))
    elif options.mafSourceType.lower() in ["user"]:
        # index maf for use here, need to remove index_file when finished
        index, index_filename = maf_utilities.open_or_build_maf_index(
            options.mafSource, options.mafIndex, species=[primary_species])
        if index is None:
            stop_err("Your MAF file appears to be malformed.")
    else:
        stop_err("Invalid MAF source type specified.")

    # open output file
    output = open(output_file, "w")

    if options.geneBED:
        region_enumerator = maf_utilities.line_enumerator(
            open(interval_file, "r").readlines())
    else:
        region_enumerator = enumerate(
            bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'),
                                              chrom_col=chr_col,
                                              start_col=start_col,
                                              end_col=end_col,
                                              strand_col=strand_col,
                                              fix_strand=True,
                                              return_header=False,
                                              return_comments=False))

    # Step through intervals
    regions_extracted = 0
    line_count = 0
    for line_count, line in region_enumerator:
        try:
            if options.geneBED:  # Process as Gene BED
                try:
                    starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(
                        line)
                    # create spliced alignment object
                    alignment = maf_utilities.get_spliced_region_alignment(
                        index,
                        primary_species,
                        fields[0],
                        starts,
                        ends,
                        strand='+',
                        species=species,
                        mincols=mincols,
                        overwrite_with_gaps=overwrite_with_gaps)
                    primary_name = secondary_name = fields[3]
                    alignment_strand = fields[5]
                except Exception, e:
                    print "Error loading exon positions from input line %i: %s" % (
                        line_count, e)
                    continue
            else:  # Process as standard intervals
                try:
                    # create spliced alignment object
                    alignment = maf_utilities.get_region_alignment(
                        index,
                        primary_species,
                        line.chrom,
                        line.start,
                        line.end,
                        strand='+',
                        species=species,
                        mincols=mincols,
                        overwrite_with_gaps=overwrite_with_gaps)
                    primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand,
                                                     line.start, line.end)
                    secondary_name = ""
                    alignment_strand = line.strand
                except Exception, e:
                    print "Error loading region positions from input line %i: %s" % (
                        line_count, e)
                    continue
Exemplo n.º 21
0
def __main__():
    index = index_filename = None

    # Parse Command Line
    options, args = doc_optparse.parse(__doc__)

    if options.dbkey:
        dbkey = options.dbkey
    else:
        dbkey = None
    if dbkey in [None, "?"]:
        maf_utilities.tool_fail(
            "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file."
        )

    species = maf_utilities.parse_species_option(options.species)

    if options.chromCol:
        chromCol = int(options.chromCol) - 1
    else:
        maf_utilities.tool_fail(
            "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes."
        )

    if options.startCol:
        startCol = int(options.startCol) - 1
    else:
        maf_utilities.tool_fail(
            "Start column not set, click the pencil icon in the history item to set the metadata attributes."
        )

    if options.endCol:
        endCol = int(options.endCol) - 1
    else:
        maf_utilities.tool_fail(
            "End column not set, click the pencil icon in the history item to set the metadata attributes."
        )

    if options.strandCol:
        strandCol = int(options.strandCol) - 1
    else:
        strandCol = -1

    if options.interval_file:
        interval_file = options.interval_file
    else:
        maf_utilities.tool_fail("Input interval file has not been specified.")

    if options.output_file:
        output_file = options.output_file
    else:
        maf_utilities.tool_fail("Output file has not been specified.")

    split_blocks_by_species = remove_all_gap_columns = False
    if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
        split_blocks_by_species = True
        if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
            remove_all_gap_columns = True
    else:
        remove_all_gap_columns = True
    # Finish parsing command line

    # Open indexed access to MAFs
    if options.mafType:
        if options.indexLocation:
            index = maf_utilities.maf_index_by_uid(options.mafType,
                                                   options.indexLocation)
        else:
            index = maf_utilities.maf_index_by_uid(options.mafType,
                                                   options.mafIndexFile)
        if index is None:
            maf_utilities.tool_fail(
                "The MAF source specified (%s) appears to be invalid." %
                (options.mafType))
    elif options.mafFile:
        index, index_filename = maf_utilities.open_or_build_maf_index(
            options.mafFile, options.mafIndex, species=[dbkey])
        if index is None:
            maf_utilities.tool_fail("Your MAF file appears to be malformed.")
    else:
        maf_utilities.tool_fail(
            "Desired source MAF type has not been specified.")

    # Create MAF writter
    out = bx.align.maf.Writer(open(output_file, "w"))

    # Iterate over input regions
    num_blocks = 0
    num_regions = None
    for num_regions, region in enumerate(
            bx.intervals.io.NiceReaderWrapper(
                open(interval_file),
                chrom_col=chromCol,
                start_col=startCol,
                end_col=endCol,
                strand_col=strandCol,
                fix_strand=True,
                return_header=False,
                return_comments=False)):  # noqa: B007
        src = maf_utilities.src_merge(dbkey, region.chrom)
        for block in index.get_as_iterator(src, region.start, region.end):
            if split_blocks_by_species:
                blocks = [
                    new_block for new_block in
                    maf_utilities.iter_blocks_split_by_species(block)
                    if maf_utilities.component_overlaps_region(
                        new_block.get_component_by_src_start(dbkey), region)
                ]
            else:
                blocks = [block]
            for block in blocks:
                block = maf_utilities.chop_block_by_region(block, src, region)
                if block is not None:
                    if species is not None:
                        block = block.limit_to_species(species)
                    block = maf_utilities.orient_block_by_region(
                        block, src, region)
                    if remove_all_gap_columns:
                        block.remove_all_gap_columns()
                    out.write(block)
                    num_blocks += 1

    # Close output MAF
    out.close()

    # remove index file if created during run
    maf_utilities.remove_temp_index_file(index_filename)

    if num_blocks:
        print("%i MAF blocks extracted for %i regions." % (num_blocks,
                                                           (num_regions + 1)))
    elif num_regions is not None:
        print("No MAF blocks could be extracted for %i regions." %
              (num_regions + 1))
    else:
        print("No valid regions have been provided.")