Пример #1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: codemls2tsv.py 2781 2009-09-10 11:33:14Z andreas $"
    )

    parser.add_option("--methods",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=("summary-numbers", "jalview",
                               "positive-site-table", "positive-site-list",
                               "count-positive-sites"),
                      help="methods for analysis.")

    parser.add_option("--selection-mode",
                      dest="selection_mode",
                      type="choice",
                      choices=("all", "consistent", "emes"),
                      help="how to select positive sites.")

    parser.add_option("--prefix",
                      dest="prefix",
                      type="string",
                      help="prefix for rows.")

    parser.add_option("--pattern-input-filenames",
                      dest="pattern_input_filenames",
                      type="string",
                      help="input pattern.")

    parser.add_option(
        "--filter-probability",
        dest="filter_probability",
        type="float",
        help=
        "threshold for probability above which to include positive sites [default=%default]."
    )

    parser.add_option(
        "--filter-omega",
        dest="filter_omega",
        type="float",
        help=
        "threshold for omega above which to include positive sites [default=%default]."
    )

    parser.add_option("--models",
                      dest="models",
                      type="string",
                      help="restrict output to set of site specific models.")

    parser.add_option("--analysis",
                      dest="analysis",
                      type="string",
                      help="restrict output to set of analysis [beb|neb].")

    parser.add_option("--significance-threshold",
                      dest="significance_threshold",
                      type="float",
                      help="significance threshold for log-likelihood test.")

    parser.add_option("--filter-mali",
                      dest="filter_mali",
                      type="choice",
                      choices=("none", "gaps"),
                      help="filter by mali to remove gapped positions.")

    parser.add_option(
        "--filename-mali",
        dest="filename_mali",
        type="string",
        help=
        "filename with multiple alignment used for calculating sites - used for filtering"
    )

    parser.add_option(
        "--filename-map-mali",
        dest="filename_map_mali",
        type="string",
        help="filename with multiple alignment to map sites onto.")

    parser.add_option(
        "--jalview-titles",
        dest="jalview_titles",
        type="string",
        help="comma separated list of jalview annotation titles.")

    parser.add_option("--jalview-symbol",
                      dest="jalview_symbol",
                      type="string",
                      help="symbol to use in jalview.")

    parser.set_defaults(
        methods=[],
        prefix=None,
        filter_probability=0,
        filter_omega=0,
        models="",
        analysis="",
        significance_threshold=0.05,
        selection_mode="consistent",
        filename_mali=None,
        filename_map_mali=None,
        jalview_symbol="*",
        jalview_titles="",
        filter_mali=None,
    )

    (options, args) = E.Start(parser)

    if options.jalview_titles:
        options.jalview_titles = options.jalview_titles.split(",")
    else:
        options.jalview_titles = args

    options.models = options.models.split(",")
    options.analysis = options.analysis.split(",")

    for a in options.analysis:
        if a not in ("beb", "neb"):
            raise "unknown analysis section: '%s', possible values are 'beb' and/or 'neb'" % a

    for a in options.models:
        if a not in ("8", "2", "3"):
            raise "unknown model: '%s', possible values are 2, 3, 8" % a

    codeml = WrapperCodeML.CodeMLSites()

    ## filter and extract functions
    filter_f = lambda x: x.mProbability >= options.filter_probability and x.mOmega >= options.filter_omega
    extract_f = lambda x: x.mResidue

    ## read multiple results
    results = []
    ninput, noutput, nskipped = 0, 0, 0

    headers = []
    for f in args:
        ninput += 1
        try:
            results.append(codeml.parseOutput(open(f, "r").readlines()))
        except WrapperCodeML.UsageError:
            if options.loglevel >= 1:
                options.stdlog.write("# no input from %s\n" % f)
            nskipped += 1
            continue
        noutput += 1
        headers.append(f)

    ## map of nested model (key) to more general model
    map_nested_models = {'8': '7', '2': '1', '3': '0'}

    if options.filename_mali:
        mali = Mali.Mali()
        mali.readFromFile(open(options.filename_mali, "r"))
    else:
        mali = None

    ###############################################################
    ###############################################################
    ###############################################################
    ## use multiple alignment to map residues to a reference mali
    ## or a sequence.
    ###############################################################
    if options.filename_map_mali:

        if not mali:
            raise "please supply the input multiple alignment, if residues are to be mapped."

        ## translate the alignments
        def translate(s):
            sequence = s.mString
            seq = []
            for codon in [
                    sequence[x:x + 3] for x in range(0, len(sequence), 3)
            ]:
                aa = Genomics.MapCodon2AA(codon)
                seq.append(aa)

            s.mString = "".join(seq)

        tmali = Mali.Mali()
        tmali.readFromFile(open(options.filename_mali, "r"))
        tmali.apply(translate)

        tmap_mali = Mali.Mali()
        tmap_mali.readFromFile(open(options.filename_map_mali, "r"))

        if tmap_mali.getAlphabet() == "na":
            tmap_mali.apply(translate)

        map_old2new = alignlib_lite.py_makeAlignmentVector()

        mali1 = alignlib_lite.py_makeProfileFromMali(convertMali2Mali(tmali))

        if tmap_mali.getLength() == 1:

            s = tmap_mali.values()[0].mString
            mali2 = alignlib_lite.py_makeSequence(s)
            ## see if you can find an identical subsequence and then align to thisD
            for x in tmali.values():
                if s in re.sub("[- .]+", "", x.mString):
                    mali1 = alignlib_lite.py_makeSequence(x.mString)
                    break
        else:
            mali2 = alignlib_lite.py_makeProfileFromMali(
                convertMali2Mali(tmap_mali))

        alignator = alignlib_lite.py_makeAlignatorDPFull(
            alignlib_lite.py_ALIGNMENT_LOCAL, -10.0, -2.0)
        alignator.align(map_old2new, mali1, mali2)

        consensus = tmap_mali.getConsensus()

        if options.loglevel >= 4:
            options.stdlog.write("# alphabet: %s\n" % tmap_mali.getAlphabet())
            options.stdlog.write("# orig  : %s\n" % tmali.getConsensus())
            options.stdlog.write("# mapped: %s\n" % consensus)
            options.stdlog.write("# alignment: %s\n" % map_old2new.Write())
    else:
        map_old2new = None

    for method in options.methods:

        if method == "summary-numbers":

            options.stdlog.write( \
"""# Numbers of positive sites.
#
# The consistent row/column contains positive sites that are significant
# (above thresholds for probability and omega) for all models/analysis
# that have been selected (label: cons).
#
# The log-likelihood ratio test is performed for model pairs, depending
# on the output chosen.
# Significance threshold: %6.4f
# The pairs are 8 versus 7 and 2 versus 1 and 3 versus 0.
#
""" % options.significance_threshold )

            ## write header
            if options.prefix: options.stdout.write("prefix\t")

            options.stdout.write("method\tnseq\t")
            h = []
            for model in options.models:
                for analysis in options.analysis:
                    h.append("%s%s" % (analysis, model))
                h.append("p%s" % (model))
                h.append("df%s" % (model))
                h.append("chi%s" % (model))
                h.append("lrt%s" % (model))

            options.stdout.write("\t".join(h))
            options.stdout.write("\tcons\tpassed\tfilename\n")

            nmethod = 0

            consistent_cols = [None for x in range(len(options.analysis))]
            passed_tests = {}
            for m in options.models:
                passed_tests[m] = 0

            for result in results:

                row_consistent = None

                if options.prefix:
                    options.stdout.write("%s" % (options.prefix))

                options.stdout.write("%i" % nmethod)
                options.stdout.write("\t%i" % (result.mNumSequences))

                npassed = 0

                for model in options.models:

                    sites = result.mSites[model]

                    ## do significance test
                    full_model, null_model = model, map_nested_models[model]

                    lrt = Stats.doLogLikelihoodTest(
                        result.mSites[full_model].mLogLikelihood,
                        result.mSites[full_model].mNumParameters,
                        result.mSites[null_model].mLogLikelihood,
                        result.mSites[null_model].mNumParameters,
                        options.significance_threshold)

                    x = 0
                    for analysis in options.analysis:

                        if analysis == "neb":
                            s = set(
                                map(
                                    extract_f,
                                    filter(filter_f,
                                           sites.mNEB.mPositiveSites)))

                        elif analysis == "beb":
                            s = set(
                                map(
                                    extract_f,
                                    filter(filter_f,
                                           sites.mBEB.mPositiveSites)))

                        options.stdout.write("\t%i" % (len(s)))

                        if not lrt.mPassed:
                            s = set()

                        if row_consistent == None:
                            row_consistent = s
                        else:
                            row_consistent = row_consistent.intersection(s)

                        if consistent_cols[x] == None:
                            consistent_cols[x] = s
                        else:
                            consistent_cols[x] = consistent_cols[
                                x].intersection(s)

                        x += 1

                    if lrt.mPassed:
                        c = "passed"
                        passed_tests[model] += 1
                        npassed += 1
                    else:
                        c = "failed"

                    options.stdout.write("\t%5.2e\t%i\t%5.2f\t%s" %\
                                         (lrt.mProbability,
                                          lrt.mDegreesFreedom,
                                          lrt.mChiSquaredValue,
                                          c))

                options.stdout.write(
                    "\t%i\t%i\t%s\n" %
                    (len(row_consistent), npassed, headers[nmethod]))

                nmethod += 1

            if options.prefix:
                options.stdout.write("%s\t" % options.prefix)

            options.stdout.write("cons")

            row_consistent = None
            total_passed = 0
            for model in options.models:

                x = 0

                for analysis in options.analysis:

                    s = consistent_cols[x]
                    if s == None:
                        s = set()

                    options.stdout.write("\t%i" % (len(s)))

                    if row_consistent == None:
                        row_consistent = s
                    else:
                        row_consistent = row_consistent.intersection(s)

                    x += 1

                options.stdout.write("\tna\t%i" % passed_tests[model])
                total_passed += passed_tests[model]

            options.stdout.write("\t%i\t%i\n" %
                                 (len(row_consistent), total_passed))

        elif method == "jalview":

            options.stdout.write("JALVIEW_ANNOTATION\n")
            options.stdout.write("# Created: %s\n\n" %
                                 (time.asctime(time.localtime(time.time()))))

            l = 1
            x = 0
            for result in results:

                sites, significance = selectPositiveSites(
                    [result], options.selection_mode, options, mali)

                codes = [""] * result.mLength

                if len(sites) == 0: continue

                for site in sites:
                    codes[site - 1] = options.jalview_symbol

                options.stdout.write(
                    "NO_GRAPH\t%s\t%s\n" %
                    (options.jalview_titles[x], "|".join(codes)))
                x += 1

        elif method == "count-positive-sites":

            sites, significance = selectPositiveSites(results,
                                                      options.selection_mode,
                                                      options, mali)

            options.stdout.write("%i\n" % (len(sites)))

        elif method in ("positive-site-table", ):

            sites, significance = selectPositiveSites(results,
                                                      options.selection_mode,
                                                      options, mali)

            headers = ["site", "P"]
            if map_old2new:
                headers.append("mapped")
                headers.append("Pm")

            options.stdout.write("\t".join(headers) + "\n")

            sites = list(sites)
            sites.sort()
            nmapped, nunmapped = 0, 0
            for site in sites:
                values = [site, "%6.4f" % significance[site]]

                if map_old2new:
                    r = map_old2new.mapRowToCol(site)
                    if r == 0:
                        values.append("na")
                        values.append("")
                        nunmapped += 1
                        if options.loglevel >= 2:
                            options.stdlog.write("# unmapped residue: %i\n" %
                                                 site)
                    else:
                        values.append(r)
                        values.append(consensus[r - 1])
                        nmapped += 1

                options.stdout.write("\t".join(map(str, (values))) + "\n")

            if options.loglevel >= 1:
                options.stdlog.write(
                    "# sites: ninput=%i, noutput=%i, nskipped=%i\n" %
                    (len(sites), nmapped, nunmapped))

    E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped))

    E.Stop()
Пример #2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-i", "--input-format", dest="input_format", type="choice",
                      choices=(
                          "plain", "fasta", "clustal", "stockholm", "phylip"),
                      help="input format of multiple alignment")

    parser.add_option("-o", "--output-format", dest="output_format", type="choice",
                      choices=("plain", "fasta", "stockholm", "phylip"),
                      help="output format of multiple alignment")

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("add",),
                      help="""method to use to build multiple alignment.""")

    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="parameter stack for methods that require one.")

    parser.add_option("-a", "--alignment-method", dest="alignment_method", type="choice",
                      choices=("sw", "nw"),
                      help="alignment_method [%default].")

    parser.set_defaults(
        input_format="fasta",
        output_format="fasta",
        method=None,
        parameters="",
        gop=-10.0,
        gep=-1.0,
        alignment_method="sw",
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    iterator = FastaIterator.iterate(sys.stdin)

    if options.method == "add":

        mali = Mali.Mali()

        mali.readFromFile(
            open(options.parameters[0], "r"), format=options.input_format)
        del options.parameters[0]

        old_length = mali.getLength()

        new_mali = convertMali2Mali(mali)

        if options.alignment_method == "sw":
            alignator = alignlib_lite.py_makeAlignatorFullDP(
                options.gop, options.gep)
        else:
            alignator = alignlib_lite.py_makeAlignatorFullDPGlobal(
                options.gop, options.gep)

        while 1:
            cur_record = iterator.next()
            if cur_record is None:
                break

            map_mali2seq = alignlib_lite.py_makeAlignataVector()

            sequence = alignlib_lite.py_makeSequence(cur_record.sequence)
            profile = alignlib_lite.py_makeProfileFromMali(new_mali)

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align(profile, sequence, map_mali2seq)

            if options.loglevel >= 3:
                options.stdlog.write(map_mali2seq.Write())

            # add sequence to mali
            a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence)
            a.thisown = 0

            new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1)

            id = cur_record.title
            mali.mIdentifiers.append(id)
            mali.mMali[id] = Mali.AlignedString(id, 0, len(
                cur_record.sequence), new_mali.getRow(new_mali.getWidth() - 1).getString())

        # substitute
        for x in range(old_length):
            mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow(
                x).getString()

        mali.writeToFile(sys.stdout, format=options.output_format)

    E.Stop()
Пример #3
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: codemls2tsv.py 2781 2009-09-10 11:33:14Z andreas $")

    parser.add_option("--methods", dest="methods", type="choice", action="append",
                      choices=("summary-numbers", "jalview",
                               "positive-site-table", "positive-site-list",
                               "count-positive-sites"),
                      help="methods for analysis.")

    parser.add_option("--selection-mode", dest="selection_mode", type="choice",
                      choices=("all", "consistent", "emes"),
                      help="how to select positive sites.")

    parser.add_option("--prefix", dest="prefix", type="string",
                      help="prefix for rows.")

    parser.add_option("--pattern-input-filenames", dest="pattern_input_filenames", type="string",
                      help="input pattern.")

    parser.add_option("--filter-probability", dest="filter_probability", type="float",
                      help="threshold for probability above which to include positive sites [default=%default].")

    parser.add_option("--filter-omega", dest="filter_omega", type="float",
                      help="threshold for omega above which to include positive sites [default=%default].")

    parser.add_option("--models", dest="models", type="string",
                      help="restrict output to set of site specific models.")

    parser.add_option("--analysis", dest="analysis", type="string",
                      help="restrict output to set of analysis [beb|neb].")

    parser.add_option("--significance-threshold", dest="significance_threshold", type="float",
                      help="significance threshold for log-likelihood test.")

    parser.add_option("--filter-mali", dest="filter_mali", type="choice",
                      choices=("none", "gaps"),
                      help="filter by mali to remove gapped positions.")

    parser.add_option("--filename-mali", dest="filename_mali", type="string",
                      help="filename with multiple alignment used for calculating sites - used for filtering")

    parser.add_option("--filename-map-mali", dest="filename_map_mali", type="string",
                      help="filename with multiple alignment to map sites onto.")

    parser.add_option("--jalview-titles", dest="jalview_titles", type="string",
                      help="comma separated list of jalview annotation titles.")

    parser.add_option("--jalview-symbol", dest="jalview_symbol", type="string",
                      help="symbol to use in jalview.")

    parser.set_defaults(
        methods=[],
        prefix=None,
        filter_probability=0,
        filter_omega=0,
        models="",
        analysis="",
        significance_threshold=0.05,
        selection_mode="consistent",
        filename_mali=None,
        filename_map_mali=None,
        jalview_symbol="*",
        jalview_titles="",
        filter_mali=None,
    )

    (options, args) = E.Start(parser)

    if options.jalview_titles:
        options.jalview_titles = options.jalview_titles.split(",")
    else:
        options.jalview_titles = args

    options.models = options.models.split(",")
    options.analysis = options.analysis.split(",")

    for a in options.analysis:
        if a not in ("beb", "neb"):
            raise "unknown analysis section: '%s', possible values are 'beb' and/or 'neb'" % a

    for a in options.models:
        if a not in ("8", "2", "3"):
            raise "unknown model: '%s', possible values are 2, 3, 8" % a

    codeml = WrapperCodeML.CodeMLSites()

    # filter and extract functions
    filter_f = lambda x: x.mProbability >= options.filter_probability and x.mOmega >= options.filter_omega
    extract_f = lambda x: x.mResidue

    # read multiple results
    results = []
    ninput, noutput, nskipped = 0, 0, 0

    headers = []
    for f in args:
        ninput += 1
        try:
            results.append(codeml.parseOutput(open(f, "r").readlines()))
        except WrapperCodeML.UsageError:
            if options.loglevel >= 1:
                options.stdlog.write("# no input from %s\n" % f)
            nskipped += 1
            continue
        noutput += 1
        headers.append(f)

    # map of nested model (key) to more general model
    map_nested_models = {'8': '7',
                         '2': '1',
                         '3': '0'}

    if options.filename_mali:
        mali = Mali.Mali()
        mali.readFromFile(open(options.filename_mali, "r"))
    else:
        mali = None

    ###############################################################
    ###############################################################
    ###############################################################
    # use multiple alignment to map residues to a reference mali
    # or a sequence.
    ###############################################################
    if options.filename_map_mali:

        if not mali:
            raise "please supply the input multiple alignment, if residues are to be mapped."

        # translate the alignments
        def translate(s):
            sequence = s.mString
            seq = []
            for codon in [sequence[x:x + 3] for x in range(0, len(sequence), 3)]:
                aa = Genomics.MapCodon2AA(codon)
                seq.append(aa)

            s.mString = "".join(seq)

        tmali = Mali.Mali()
        tmali.readFromFile(open(options.filename_mali, "r"))
        tmali.apply(translate)

        tmap_mali = Mali.Mali()
        tmap_mali.readFromFile(open(options.filename_map_mali, "r"))

        if tmap_mali.getAlphabet() == "na":
            tmap_mali.apply(translate)

        map_old2new = alignlib_lite.py_makeAlignmentVector()

        mali1 = alignlib_lite.py_makeProfileFromMali(convertMali2Mali(tmali))

        if tmap_mali.getLength() == 1:

            s = tmap_mali.values()[0].mString
            mali2 = alignlib_lite.py_makeSequence(s)
            # see if you can find an identical subsequence and then align to
            # thisD
            for x in tmali.values():
                if s in re.sub("[- .]+", "", x.mString):
                    mali1 = alignlib_lite.py_makeSequence(x.mString)
                    break
        else:
            mali2 = alignlib_lite.py_makeProfileFromMali(
                convertMali2Mali(tmap_mali))

        alignator = alignlib_lite.py_makeAlignatorDPFull(
            alignlib_lite.py_ALIGNMENT_LOCAL, -10.0, -2.0)
        alignator.align(map_old2new, mali1, mali2)

        consensus = tmap_mali.getConsensus()

        if options.loglevel >= 4:
            options.stdlog.write("# alphabet: %s\n" % tmap_mali.getAlphabet())
            options.stdlog.write("# orig  : %s\n" % tmali.getConsensus())
            options.stdlog.write("# mapped: %s\n" % consensus)
            options.stdlog.write("# alignment: %s\n" % map_old2new.Write())
    else:
        map_old2new = None

    for method in options.methods:

        if method == "summary-numbers":

            options.stdlog.write(
                """# Numbers of positive sites.
#
# The consistent row/column contains positive sites that are significant
# (above thresholds for probability and omega) for all models/analysis
# that have been selected (label: cons).
#
# The log-likelihood ratio test is performed for model pairs, depending
# on the output chosen.
# Significance threshold: %6.4f
# The pairs are 8 versus 7 and 2 versus 1 and 3 versus 0.
#
""" % options.significance_threshold )

            # write header
            if options.prefix:
                options.stdout.write("prefix\t")

            options.stdout.write("method\tnseq\t")
            h = []
            for model in options.models:
                for analysis in options.analysis:
                    h.append("%s%s" % (analysis, model))
                h.append("p%s" % (model))
                h.append("df%s" % (model))
                h.append("chi%s" % (model))
                h.append("lrt%s" % (model))

            options.stdout.write("\t".join(h))
            options.stdout.write("\tcons\tpassed\tfilename\n")

            nmethod = 0

            consistent_cols = [None for x in range(len(options.analysis))]
            passed_tests = {}
            for m in options.models:
                passed_tests[m] = 0

            for result in results:

                row_consistent = None

                if options.prefix:
                    options.stdout.write("%s" % (options.prefix))

                options.stdout.write("%i" % nmethod)
                options.stdout.write("\t%i" % (result.mNumSequences))

                npassed = 0

                for model in options.models:

                    sites = result.mSites[model]

                    # do significance test
                    full_model, null_model = model, map_nested_models[model]

                    lrt = Stats.doLogLikelihoodTest(
                        result.mSites[full_model].mLogLikelihood,
                        result.mSites[full_model].mNumParameters,
                        result.mSites[null_model].mLogLikelihood,
                        result.mSites[null_model].mNumParameters,
                        options.significance_threshold)

                    x = 0
                    for analysis in options.analysis:

                        if analysis == "neb":
                            s = set(
                                map(extract_f, filter(filter_f, sites.mNEB.mPositiveSites)))

                        elif analysis == "beb":
                            s = set(
                                map(extract_f, filter(filter_f, sites.mBEB.mPositiveSites)))

                        options.stdout.write("\t%i" % (len(s)))

                        if not lrt.mPassed:
                            s = set()

                        if row_consistent is None:
                            row_consistent = s
                        else:
                            row_consistent = row_consistent.intersection(s)

                        if consistent_cols[x] is None:
                            consistent_cols[x] = s
                        else:
                            consistent_cols[x] = consistent_cols[
                                x].intersection(s)

                        x += 1

                    if lrt.mPassed:
                        c = "passed"
                        passed_tests[model] += 1
                        npassed += 1
                    else:
                        c = "failed"

                    options.stdout.write("\t%5.2e\t%i\t%5.2f\t%s" %
                                         (lrt.mProbability,
                                          lrt.mDegreesFreedom,
                                          lrt.mChiSquaredValue,
                                          c))

                options.stdout.write(
                    "\t%i\t%i\t%s\n" % (len(row_consistent), npassed, headers[nmethod]))

                nmethod += 1

            if options.prefix:
                options.stdout.write("%s\t" % options.prefix)

            options.stdout.write("cons")

            row_consistent = None
            total_passed = 0
            for model in options.models:

                x = 0

                for analysis in options.analysis:

                    s = consistent_cols[x]
                    if s is None:
                        s = set()

                    options.stdout.write("\t%i" % (len(s)))

                    if row_consistent is None:
                        row_consistent = s
                    else:
                        row_consistent = row_consistent.intersection(s)

                    x += 1

                options.stdout.write("\tna\t%i" % passed_tests[model])
                total_passed += passed_tests[model]

            options.stdout.write(
                "\t%i\t%i\n" % (len(row_consistent), total_passed))

        elif method == "jalview":

            options.stdout.write("JALVIEW_ANNOTATION\n")
            options.stdout.write("# Created: %s\n\n" %
                                 (time.asctime(time.localtime(time.time()))))

            l = 1
            x = 0
            for result in results:

                sites, significance = selectPositiveSites(
                    [result], options.selection_mode, options, mali)

                codes = [""] * result.mLength

                if len(sites) == 0:
                    continue

                for site in sites:
                    codes[site - 1] = options.jalview_symbol

                options.stdout.write(
                    "NO_GRAPH\t%s\t%s\n" % (options.jalview_titles[x], "|".join(codes)))
                x += 1

        elif method == "count-positive-sites":

            sites, significance = selectPositiveSites(
                results, options.selection_mode, options, mali)

            options.stdout.write("%i\n" % (len(sites)))

        elif method in ("positive-site-table", ):

            sites, significance = selectPositiveSites(
                results, options.selection_mode, options, mali)

            headers = ["site", "P"]
            if map_old2new:
                headers.append("mapped")
                headers.append("Pm")

            options.stdout.write("\t".join(headers) + "\n")

            sites = list(sites)
            sites.sort()
            nmapped, nunmapped = 0, 0
            for site in sites:
                values = [site, "%6.4f" % significance[site]]

                if map_old2new:
                    r = map_old2new.mapRowToCol(site)
                    if r == 0:
                        values.append("na")
                        values.append("")
                        nunmapped += 1
                        if options.loglevel >= 2:
                            options.stdlog.write(
                                "# unmapped residue: %i\n" % site)
                    else:
                        values.append(r)
                        values.append(consensus[r - 1])
                        nmapped += 1

                options.stdout.write("\t".join(map(str, (values))) + "\n")

            if options.loglevel >= 1:
                options.stdlog.write("# sites: ninput=%i, noutput=%i, nskipped=%i\n" % (
                    len(sites), nmapped, nunmapped))

    E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped))

    E.Stop()
Пример #4
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("plain", "fasta", "clustal", "stockholm",
                               "phylip"),
                      help="input format of multiple alignment")

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("plain", "fasta", "stockholm", "phylip"),
                      help="output format of multiple alignment")

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("add", ),
                      help="""method to use to build multiple alignment.""")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="parameter stack for methods that require one.")

    parser.add_option("-a",
                      "--alignment-method",
                      dest="alignment_method",
                      type="choice",
                      choices=("sw", "nw"),
                      help="alignment_method [%default].")

    parser.set_defaults(
        input_format="fasta",
        output_format="fasta",
        method=None,
        parameters="",
        gop=-10.0,
        gep=-1.0,
        alignment_method="sw",
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    iterator = FastaIterator.iterate(sys.stdin)

    if options.method == "add":

        mali = Mali.Mali()

        mali.readFromFile(open(options.parameters[0], "r"),
                          format=options.input_format)
        del options.parameters[0]

        old_length = mali.getLength()

        new_mali = convertMali2Mali(mali)

        if options.alignment_method == "sw":
            alignator = alignlib_lite.py_makeAlignatorFullDP(
                options.gop, options.gep)
        else:
            alignator = alignlib_lite.py_makeAlignatorFullDPGlobal(
                options.gop, options.gep)

        while 1:
            cur_record = iterator.next()
            if cur_record is None: break

            map_mali2seq = alignlib_lite.py_makeAlignataVector()

            sequence = alignlib_lite.py_makeSequence(cur_record.sequence)
            profile = alignlib_lite.py_makeProfileFromMali(new_mali)

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align(profile, sequence, map_mali2seq)

            if options.loglevel >= 3:
                options.stdlog.write(map_mali2seq.Write())

            ## add sequence to mali
            a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence)
            a.thisown = 0

            new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1)

            id = cur_record.title
            mali.mIdentifiers.append(id)
            mali.mMali[id] = Mali.AlignedString(
                id, 0, len(cur_record.sequence),
                new_mali.getRow(new_mali.getWidth() - 1).getString())

        # substitute
        for x in range(old_length):
            mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow(
                x).getString()

        mali.writeToFile(sys.stdout, format=options.output_format)

    E.Stop()