Пример #1
0
def show_difftable_topo(difftable, attr1, attr2, usecolor=False):
    if not difftable:
        return
    showtable = []
    maxcolwidth = 80
    total_dist = 0
    for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True):
        total_dist += dist
        n1 = Tree(n1.write(features=[attr1]))
        n2 = Tree(n2.write(features=[attr2]))
        n1.ladderize()
        n2.ladderize()
        for leaf in n1.iter_leaves():
            leaf.name = getattr(leaf, attr1)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")
        for leaf in n2.iter_leaves():
            leaf.name = getattr(leaf, attr2)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")

        topo1 = n1.get_ascii(show_internal=False, compact=False)
        topo2 = n2.get_ascii(show_internal=False, compact=False)

        # This truncates too large topology strings pretending to be
        # scrolled to the right margin
        topo1_lines = topo1.split("\n")
        topowidth1 = max([len(l) for l in topo1_lines])
        if topowidth1 > maxcolwidth:
            start = topowidth1 - maxcolwidth
            topo1 = '\n'.join([line[start + 1:] for line in topo1_lines])

        topo2_lines = topo2.split("\n")
        topowidth2 = max([len(l) for l in topo2_lines])
        if topowidth2 > maxcolwidth:
            start = topowidth2 - maxcolwidth
            topo2 = '\n'.join([line[start + 1:] for line in topo2_lines])

        showtable.append([
            "%0.2g" % dist,
            "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)),
            topo1, topo2
        ])
    print_table(showtable,
                header=["Dist", "#diffs", "Tree1", "Tree2"],
                max_col_width=maxcolwidth,
                wrap_style="wrap",
                row_line=True)

    log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" %
             (total_dist, len(difftable)))
Пример #2
0
def show_difftable_topo(difftable, attr1, attr2, usecolor=False):
    if not difftable:
        return
    showtable = []
    maxcolwidth = 80
    total_dist = 0
    for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True):
        total_dist += dist
        n1 = Tree(n1.write(features=[attr1]))
        n2 = Tree(n2.write(features=[attr2]))
        n1.ladderize()
        n2.ladderize()
        for leaf in n1.iter_leaves():
            leaf.name = getattr(leaf, attr1)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")
        for leaf in n2.iter_leaves():
            leaf.name = getattr(leaf, attr2)
            if leaf.name in diff:
                leaf.name += " ***"
                if usecolor:
                    leaf.name = color(leaf.name, "red")

        topo1 = n1.get_ascii(show_internal=False, compact=False)
        topo2 = n2.get_ascii(show_internal=False, compact=False)

        # This truncates too large topology strings pretending to be
        # scrolled to the right margin
        topo1_lines = topo1.split("\n")
        topowidth1 = max([len(l) for l in topo1_lines])
        if topowidth1 > maxcolwidth:
            start = topowidth1 - maxcolwidth
            topo1 = "\n".join([line[start + 1 :] for line in topo1_lines])

        topo2_lines = topo2.split("\n")
        topowidth2 = max([len(l) for l in topo2_lines])
        if topowidth2 > maxcolwidth:
            start = topowidth2 - maxcolwidth
            topo2 = "\n".join([line[start + 1 :] for line in topo2_lines])

        showtable.append(
            ["%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2]
        )
    print_table(
        showtable,
        header=["Dist", "#diffs", "Tree1", "Tree2"],
        max_col_width=maxcolwidth,
        wrap_style="wrap",
        row_line=True,
    )

    log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
Пример #3
0
def get_output_filename():
    print "Specify a %s where to save the soundcard output %s." % ( color('filename','green'), color('(without .ogg)','green') )
    print "You can also press %s to save the output %s." % ( color('ENTER','green'), color('to a temp. file','green') )
    filename = raw_input( color("Filename: ", 'yellow', ['bold']) )
    if len(filename) == 0:
        filename = os.path.join( TMP_DIR, "record_%s.ogg" % common.get_timestamp() )
    else:
        filename = os.path.join(TMP_DIR, filename + ".ogg")
        
    if os.path.exists(filename):
        sys.stderr.write( "%s: Error: the file %s already exists.\n" % (sys.argv[0], filename) )
        sys.exit(-2)

    return filename
Пример #4
0
def visualize(queries, by):
    categories = [] 
    earliest = queries[0]['year'].min()
    latest = queries[0]['year'].max()
    for query in queries:
        first_year = query['year'].min()
        last_year = query['year'].max()
        if earliest < first_year:
            earliest = first_year
        if latest > last_year:
            latest = last_year
        categories.append(query[by][0])
    for i in range(len(queries)):
        queries[i] = queries[i][(queries[i].year >= earliest) & (queries[i].year <= latest)]
        if by == 'indicator':
            vals = queries[i]['value']
            queries[i]['value'] = (vals-vals.mean())/vals.std()
    fig = go.Figure()
    for i in range(len(queries)):
        query = queries[i]
        fig.add_trace(go.Scatter(x=query['year'], y=query['value'], name=categories[i],
                                 mode='lines', 
                                 line={'width': 2, 'color': color()},
                                 fill='none'))
    fig.update_layout(template='plotly_dark',
                      plot_bgcolor='#23272c',
                      paper_bgcolor='#23272c',
                      yaxis_title='Value',
                      xaxis_title='Year')
    return fig
Пример #5
0
def get_output_filename():
    print "Specify a %s where to save the soundcard output %s." % (color(
        'filename', 'green'), color('(without .ogg)', 'green'))
    print "You can also press %s to save the output %s." % (color(
        'ENTER', 'green'), color('to a temp. file', 'green'))
    filename = raw_input(color("Filename: ", 'yellow', ['bold']))
    if len(filename) == 0:
        filename = os.path.join(TMP_DIR,
                                "record_%s.ogg" % common.get_timestamp())
    else:
        filename = os.path.join(TMP_DIR, filename + ".ogg")

    if os.path.exists(filename):
        sys.stderr.write("%s: Error: the file %s already exists.\n" %
                         (sys.argv[0], filename))
        sys.exit(-2)

    return filename
Пример #6
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__DESCRIPTION__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    # name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
    # action - The basic type of action to be taken when this argument is encountered at the command line. (store, store_const, store_true, store_false, append, append_const, version)
    # nargs - The number of command-line arguments that should be consumed. (N, ? (one or default), * (all 1 or more), + (more than 1) )
    # const - A constant value required by some action and nargs selections.
    # default - The value produced if the argument is absent from the command line.
    # type - The type to which the command-line argument should be converted.
    # choices - A container of the allowable values for the argument.
    # required - Whether or not the command-line option may be omitted (optionals only).
    # help - A brief description of what the argument does.
    # metavar - A name for the argument in usage messages.
    # dest - The name of the attribute to be added to the object returned by parse_args().

    parser.add_argument("--show",
                        dest="show_tree",
                        action="store_true",
                        help="""Display tree after the analysis.""")

    parser.add_argument("--render",
                        dest="render",
                        action="store_true",
                        help="""Render tree.""")

    parser.add_argument("--dump",
                        dest="dump",
                        action="store_true",
                        help="""Dump analysis""")

    parser.add_argument(
        "--explore",
        dest="explore",
        type=str,
        help="""Reads a previously analyzed tree and visualize it""")

    input_args = parser.add_mutually_exclusive_group()
    input_args.required = True
    input_args.add_argument("-t",
                            "--tree",
                            dest="target_tree",
                            nargs="+",
                            type=str,
                            help="""Tree file in newick format""")

    input_args.add_argument("-tf",
                            dest="tree_list_file",
                            type=str,
                            help="File with the list of tree files")

    parser.add_argument("--tax",
                        dest="tax_info",
                        type=str,
                        help="If the taxid attribute is not set in the"
                        " newick file for all leaf nodes, a tab file file"
                        " with the translation of name and taxid can be"
                        " provided with this option.")

    parser.add_argument(
        "--sp_delimiter",
        dest="sp_delimiter",
        type=str,
        help=
        "If taxid is part of the leaf name, delimiter used to split the string"
    )

    parser.add_argument(
        "--sp_field",
        dest="sp_field",
        type=int,
        default=0,
        help="field position for taxid after splitting leaf names")

    parser.add_argument("--ref",
                        dest="ref_tree",
                        type=str,
                        help="Uses ref tree to compute robinson foulds"
                        " distances of the different subtrees")

    parser.add_argument("--rf-only",
                        dest="rf_only",
                        action="store_true",
                        help="Skip ncbi consensus analysis")

    parser.add_argument(
        "--outgroup",
        dest="outgroup",
        type=str,
        nargs="+",
        help="A list of node names defining the trees outgroup")

    parser.add_argument("--is_sptree",
                        dest="is_sptree",
                        action="store_true",
                        help="Assumes no duplication nodes in the tree")

    parser.add_argument("-o",
                        dest="output",
                        type=str,
                        help="Writes result into a file")

    parser.add_argument("--tax2name", dest="tax2name", type=str, help="")

    parser.add_argument("--tax2track", dest="tax2track", type=str, help="")

    parser.add_argument("--dump_tax_info",
                        dest="dump_tax_info",
                        action="store_true",
                        help="")

    args = parser.parse_args(argv)

    if args.sp_delimiter:
        GET_TAXID = lambda x: x.split(args.sp_delimiter)[args.sp_field]
    else:
        GET_TAXID = None

    reftree_name = os.path.basename(args.ref_tree) if args.ref_tree else ""
    if args.explore:
        print >> sys.stderr, "Reading tree from file:", args.explore
        t = cPickle.load(open(args.explore))
        ts = TreeStyle()
        ts.force_topology = True
        ts.show_leaf_name = False
        ts.layout_fn = ncbi_layout
        ts.mode = "r"
        t.show(tree_style=ts)
        print >> sys.stderr, "dumping color config"
        cPickle.dump(name2color, open("ncbi_colors.pkl", "w"))
        sys.exit()

    if args.output:
        OUT = open(args.output, "w")
    else:
        OUT = sys.stdout

    print >> sys.stderr, "Dumping results into", OUT
    target_trees = []
    if args.tree_list_file:
        target_trees = [line.strip() for line in open(args.tree_list_file)]
    if args.target_tree:
        target_trees += args.target_tree
    prev_tree = None
    if args.tax2name:
        tax2name = cPickle.load(open(args.tax2name))
    else:
        tax2name = {}

    if args.tax2track:
        tax2track = cPickle.load(open(args.tax2track))
    else:
        tax2track = {}
    print len(tax2track), len(tax2name)
    header = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees",
              "Broken clades", "Clade sizes", "RF (avg)", "RF (med)",
              "RF (std)", "RF (max)", "Shared tips")
    print >> OUT, '|'.join([h.ljust(15) for h in header])
    if args.ref_tree:
        print >> sys.stderr, "Reading ref tree from", args.ref_tree
        reft = Tree(args.ref_tree, format=1)
    else:
        reft = None

    SHOW_TREE = False
    if args.show_tree or args.render:
        SHOW_TREE = True

    prev_broken = set()
    ENTRIES = []
    ncbi.connect_database()
    for tfile in target_trees:
        #print tfile
        t = PhyloTree(tfile, sp_naming_function=None)
        if GET_TAXID:
            for n in t.iter_leaves():
                n.name = GET_TAXID(n.name)

        if args.outgroup:
            if len(args.outgroup) == 1:
                out = t & args.outgroup[0]
            else:
                out = t.get_common_ancestor(args.outgroup)
                if set(out.get_leaf_names()) ^ set(args.outgroup):
                    raise ValueError("Outgroup is not monophyletic")

            t.set_outgroup(out)
        t.ladderize()

        if prev_tree:
            tree_compare(t, prev_tree)
        prev_tree = t

        if args.tax_info:
            tax2name, tax2track = annotate_tree_with_taxa(
                t, args.tax_info, tax2name, tax2track)
            if args.dump_tax_info:
                cPickle.dump(tax2track, open("tax2track.pkl", "w"))
                cPickle.dump(tax2name, open("tax2name.pkl", "w"))
                print "Tax info written into pickle files"
        else:
            for n in t.iter_leaves():
                spcode = n.name
                n.add_features(taxid=spcode)
                n.add_features(species=spcode)
            tax2name, tax2track = annotate_tree_with_taxa(
                t, None, tax2name, tax2track)

        # Split tree into species trees
        #subtrees =  t.get_speciation_trees()
        if not args.rf_only:
            #print "Calculating tree subparts..."
            t1 = time.time()
            if not args.is_sptree:
                subtrees = t.split_by_dups()
                #print "Subparts:", len(subtrees), time.time()-t1
            else:
                subtrees = [t]

            valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(
                t, subtrees, show_tree=SHOW_TREE)

            #print valid_subtrees, broken_subtrees, ncbi_mistakes, total_rf
        else:
            subtrees = []
            valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = 0, 0, 0, 0, 0, 0

        ndups = 0
        nsubtrees = len(subtrees)

        rf = 0
        rf_max = 0
        rf_std = 0
        rf_med = 0
        common_names = 0
        max_size = 0
        if reft and len(subtrees) == 1:
            rf = t.robinson_foulds(reft, attr_t1="realname")
            rf_max = rf[1]
            rf = rf[0]
            rf_med = rf

        elif reft:
            #print "Calculating avg RF..."
            nsubtrees, ndups, subtrees = t.get_speciation_trees(
                map_features=["taxid"])
            #print len(subtrees), "Sub-Species-trees found"
            avg_rf = []
            rf_max = 0.0  # reft.robinson_foulds(reft)[1]
            sum_size = 0.0
            print nsubtrees, "subtrees", ndups, "duplications"

            for ii, subt in enumerate(subtrees):
                print "\r%d" % ii,
                sys.stdout.flush()
                try:
                    partial_rf = subt.robinson_foulds(reft, attr_t1="taxid")
                except ValueError:
                    pass
                else:
                    sptree_size = len(
                        set([n.taxid for n in subt.iter_leaves()]))
                    sum_size += sptree_size
                    avg_rf.append(
                        (partial_rf[0] / float(partial_rf[1])) * sptree_size)
                    common_names = len(partial_rf[3])
                    max_size = max(max_size, sptree_size)
                    rf_max = max(rf_max, partial_rf[1])
                #print  partial_rf[:2]
            rf = numpy.sum(avg_rf) / float(sum_size)  # Treeko dist
            rf_std = numpy.std(avg_rf)
            rf_med = numpy.median(avg_rf)

        sizes_info = "%0.1f/%0.1f +- %0.1f" % (numpy.mean(broken_sizes),
                                               numpy.median(broken_sizes),
                                               numpy.std(broken_sizes))
        iter_values = [
            os.path.basename(tfile), nsubtrees, ndups, broken_subtrees,
            ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std,
            rf_max, common_names
        ]
        print >> OUT, '|'.join(
            map(lambda x: str(x).strip().ljust(15), iter_values))
        fixed = sorted([n for n in prev_broken if n not in broken_clades])
        new_problems = sorted(broken_clades - prev_broken)
        fixed_string = color(', '.join(fixed), "green") if fixed else ""
        problems_string = color(', '.join(new_problems),
                                "red") if new_problems else ""
        OUT.write("    Fixed clades: %s\n" % fixed_string) if fixed else None
        OUT.write("    New broken:   %s\n" %
                  problems_string) if new_problems else None
        prev_broken = broken_clades
        ENTRIES.append([
            os.path.basename(tfile), nsubtrees, ndups, broken_subtrees,
            ncbi_mistakes, broken_branches, sizes_info, fixed_string,
            problems_string
        ])
        OUT.flush()
        if args.show_tree or args.render:
            ts = TreeStyle()
            ts.force_topology = True
            #ts.tree_width = 500
            ts.show_leaf_name = False
            ts.layout_fn = ncbi_layout
            ts.mode = "r"
            t.dist = 0
            if args.show_tree:
                #if args.hide_monophyletic:
                #    tax2monophyletic = {}
                #    n2content = t.get_node2content()
                #    for node in t.traverse():
                #        term2count = defaultdict(int)
                #        for leaf in n2content[node]:
                #            if leaf.lineage:
                #                for term in leaf.lineage:
                #                    term2count[term] += 1
                #        expected_size = len(n2content)
                #        for term, count in term2count.iteritems():
                #            if count > 1

                print "Showing tree..."
                t.show(tree_style=ts)
            else:
                t.render("img.svg", tree_style=ts, dpi=300)
            print "dumping color config"
            cPickle.dump(name2color, open("ncbi_colors.pkl", "w"))

        if args.dump:
            cPickle.dump(t, open("ncbi_analysis.pkl", "w"))

    print
    print
    HEADER = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees",
              "Broken clades", "Broken branches", "Clade sizes",
              "Fixed Groups", "New Broken Clades")
    print_table(ENTRIES, max_col_width=50, row_line=True, header=HEADER)

    if args.output:
        OUT.close()
Пример #7
0
    def __init__(self):
        """ Description of init """

        # Create and parse the options
        parser = OptionParser()
        # Add the option names
        parser.add_option("-a", "--type")
        parser.add_option("-b", "--idlist")
        parser.add_option("-c", "--from_d")
        parser.add_option("-d", "--to_d")
        parser.add_option("-e", "--xmin")
        parser.add_option("-f", "--xmax")
        parser.add_option("-g", "--ymin")
        parser.add_option("-i", "--ymax")
        parser.add_option("-j", "--offset")
        parser.add_option("-k", "--as_function_of_t")
        parser.add_option("-l", "--logscale")
        parser.add_option("-m", "--shift_temp_unit")
        parser.add_option("-n", "--flip_x")
        parser.add_option("-o", "--shift_be_ke")
        parser.add_option("-p", "--size")

        (options, args) = parser.parse_args()
        # For use in other methods
        self.options = options

        ### Process options
        # Fetch idlist
        self.idlist = [
            int(element) for element in options.idlist.split(',')[1:]
        ]
        # Turn the offset "key:value," pair string into a dictionary
        self.offsets = dict([[int(offset.split(':')[0]),
                              offset.split(':')[1]]
                             for offset in options.offset.split(',')[1:]])
        # Turn as_function_of_t into boolean
        self.as_function_of_t = True if options.as_function_of_t ==\
            'checked' else False
        self.shift_temp_unit = True if options.shift_temp_unit ==\
            'checked' else False
        self.logscale = True if options.logscale == 'checked' else False
        self.flip_x = True if options.flip_x == 'checked' else False
        self.shift_be_ke = True if options.shift_be_ke == 'checked' else False

        ### Create db object # ADD MORE OPTIONS
        self.from_to = {'from': options.from_d, 'to': options.to_d}
        self.db = dataBaseBackend(typed=options.type,
                                  from_to=self.from_to,
                                  id_list=self.idlist,
                                  offsets=self.offsets,
                                  as_function_of_t=self.as_function_of_t,
                                  shift_temp_unit=self.shift_temp_unit,
                                  shift_be_ke=self.shift_be_ke)

        self.standard_sizes = {
            'small': '450x300',
            'large': '4500x3000',
            'def_size': '900x600'
        }

        # The 'name' is a string that is unique for this plot
        # Here we add all the information that is entered into the db object
        self.name = self.db.global_settings['chamber_name'] + '_' + options.type

        if options.from_d != '' or options.to_d != '':
            self.name += '_' + options.from_d + '_' + options.to_d

        self.name += ('_' +
                      'as_function_of_t') if self.as_function_of_t else ''
        self.name += ('_' + 'shift_temp_unit') if self.shift_temp_unit else ''
        self.name += ('_' + 'logscale') if self.logscale else ''
        self.name += ('_' + 'flip_x') if self.flip_x else ''
        self.name += ('_' + 'shift_be_ke') if self.shift_be_ke else ''

        if len(self.idlist) > 0:
            self.name += '_' + str(self.idlist)

        # object to give first good color, and then random colors
        self.c = color()
Пример #8
0
    def __init__(self):
        """ Description of init """

        # Create and parse the options
        parser = OptionParser()
        # Add the option names
        parser.add_option("-a", "--type")
        parser.add_option("-b", "--idlist")
        parser.add_option("-c", "--from_d")
        parser.add_option("-d", "--to_d")
        parser.add_option("-e", "--xmin")
        parser.add_option("-f", "--xmax")
        parser.add_option("-g", "--ymin")
        parser.add_option("-i", "--ymax")
        parser.add_option("-j", "--offset")
        parser.add_option("-k", "--as_function_of_t")
        parser.add_option("-l", "--logscale")
        parser.add_option("-m", "--shift_temp_unit")
        parser.add_option("-n", "--flip_x")
        parser.add_option("-o", "--shift_be_ke")
        parser.add_option("-p", "--size")

        (options, args) = parser.parse_args()
        # For use in other methods
        self.options = options

        ### Process options
        # Fetch idlist
        self.idlist = [int(element) for element in
                       options.idlist.split(',')[1:]]
        # Turn the offset "key:value," pair string into a dictionary
        self.offsets =  dict([[int(offset.split(':')[0]), offset.split(':')[1]]
                              for offset in options.offset.split(',')[1:]])
        # Turn as_function_of_t into boolean
        self.as_function_of_t = True if options.as_function_of_t ==\
            'checked' else False
        self.shift_temp_unit = True if options.shift_temp_unit ==\
            'checked' else False
        self.logscale = True if options.logscale == 'checked' else False
        self.flip_x = True if options.flip_x == 'checked' else False
        self.shift_be_ke = True if options.shift_be_ke == 'checked' else False

        ### Create db object # ADD MORE OPTIONS
        self.from_to = {'from':options.from_d, 'to':options.to_d}
        self.db = dataBaseBackend(typed=options.type, from_to=self.from_to,
                                  id_list=self.idlist, offsets=self.offsets,
                                  as_function_of_t=self.as_function_of_t,
                                  shift_temp_unit=self.shift_temp_unit,
                                  shift_be_ke=self.shift_be_ke)

        self.standard_sizes = {'small':'450x300', 'large':'4500x3000',
                               'def_size':'900x600'}
        
        # The 'name' is a string that is unique for this plot
        # Here we add all the information that is entered into the db object
        self.name = self.db.global_settings['chamber_name'] + '_' + options.type

        if options.from_d != '' or options.to_d != '':
            self.name += '_' + options.from_d + '_' + options.to_d

        self.name += ('_' + 'as_function_of_t') if self.as_function_of_t else ''
        self.name += ('_' + 'shift_temp_unit') if self.shift_temp_unit else ''
        self.name += ('_' + 'logscale') if self.logscale else ''
        self.name += ('_' + 'flip_x') if self.flip_x else ''
        self.name += ('_' + 'shift_be_ke') if self.shift_be_ke else ''

        if len(self.idlist) > 0:
            self.name += '_' + str(self.idlist)

        # object to give first good color, and then random colors
        self.c = color()
Пример #9
0
def main(argv):
    parser = argparse.ArgumentParser(description=__DESCRIPTION__, 
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    # name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
    # action - The basic type of action to be taken when this argument is encountered at the command line. (store, store_const, store_true, store_false, append, append_const, version)
    # nargs - The number of command-line arguments that should be consumed. (N, ? (one or default), * (all 1 or more), + (more than 1) )
    # const - A constant value required by some action and nargs selections. 
    # default - The value produced if the argument is absent from the command line.
    # type - The type to which the command-line argument should be converted.
    # choices - A container of the allowable values for the argument.
    # required - Whether or not the command-line option may be omitted (optionals only).
    # help - A brief description of what the argument does.
    # metavar - A name for the argument in usage messages.
    # dest - The name of the attribute to be added to the object returned by parse_args().
    
    parser.add_argument("--show", dest="show_tree",
                        action="store_true", 
                        help="""Display tree after the analysis.""")
    
    parser.add_argument("--render", dest="render",
                        action="store_true", 
                        help="""Render tree.""")

    parser.add_argument("--dump", dest="dump",
                        action="store_true", 
                        help="""Dump analysis""")

    parser.add_argument("--explore", dest="explore",
                        type=str,
                        help="""Reads a previously analyzed tree and visualize it""")
    
    input_args = parser.add_mutually_exclusive_group()
    input_args.required=True
    input_args.add_argument("-t", "--tree", dest="target_tree",  nargs="+",
                        type=str,
                        help="""Tree file in newick format""")

    input_args.add_argument("-tf", dest="tree_list_file",
                        type=str, 
                        help="File with the list of tree files")
    
    parser.add_argument("--tax", dest="tax_info", type=str,
                        help="If the taxid attribute is not set in the"
                        " newick file for all leaf nodes, a tab file file"
                        " with the translation of name and taxid can be"
                        " provided with this option.")

    parser.add_argument("--sp_delimiter", dest="sp_delimiter", type=str,
                        help="If taxid is part of the leaf name, delimiter used to split the string")

    parser.add_argument("--sp_field", dest="sp_field", type=int, default=0,
                        help="field position for taxid after splitting leaf names")
    
    parser.add_argument("--ref", dest="ref_tree", type=str,
                        help="Uses ref tree to compute robinson foulds"
                        " distances of the different subtrees")

    parser.add_argument("--rf-only", dest="rf_only",
                        action = "store_true",
                        help="Skip ncbi consensus analysis")

    parser.add_argument("--outgroup", dest="outgroup",
                        type=str, nargs="+",
                        help="A list of node names defining the trees outgroup")
    
    parser.add_argument("--is_sptree", dest="is_sptree",
                        action = "store_true",
                        help="Assumes no duplication nodes in the tree")
    
    parser.add_argument("-o", dest="output", type=str,
                        help="Writes result into a file")

    parser.add_argument("--tax2name", dest="tax2name", type=str,
                        help="")
    
    parser.add_argument("--tax2track", dest="tax2track", type=str,
                        help="")
    
    parser.add_argument("--dump_tax_info", dest="dump_tax_info", action="store_true",
                        help="")
    
    args = parser.parse_args(argv)

    if args.sp_delimiter:
        GET_TAXID = lambda x: x.split(args.sp_delimiter)[args.sp_field]
    else:
        GET_TAXID = None
    
    reftree_name = os.path.basename(args.ref_tree) if args.ref_tree else ""
    if args.explore:
        print >>sys.stderr, "Reading tree from file:", args.explore
        t = cPickle.load(open(args.explore))
        ts = TreeStyle()
        ts.force_topology = True
        ts.show_leaf_name = False
        ts.layout_fn = ncbi_layout 
        ts.mode = "r"
        t.show(tree_style=ts)
        print >>sys.stderr, "dumping color config"
        cPickle.dump(name2color, open("ncbi_colors.pkl", "w"))
        sys.exit()
    
    if args.output:
        OUT = open(args.output, "w")
    else:
        OUT = sys.stdout

    print >>sys.stderr, "Dumping results into", OUT
    target_trees = []
    if args.tree_list_file:
        target_trees = [line.strip() for line in open(args.tree_list_file)]
    if args.target_tree:
        target_trees += args.target_tree
    prev_tree = None
    if args.tax2name:
        tax2name = cPickle.load(open(args.tax2name))
    else:
        tax2name = {}

    if args.tax2track:
        tax2track = cPickle.load(open(args.tax2track))
    else:
        tax2track = {}
    print len(tax2track), len(tax2name)
    header = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Clade sizes", "RF (avg)", "RF (med)", "RF (std)", "RF (max)", "Shared tips")
    print >>OUT, '|'.join([h.ljust(15) for h in header])
    if args.ref_tree:
        print >>sys.stderr, "Reading ref tree from", args.ref_tree
        reft = Tree(args.ref_tree, format=1)
    else:
        reft = None

    SHOW_TREE = False
    if args.show_tree or args.render:
        SHOW_TREE = True

        
    prev_broken = set()
    ENTRIES = []
    ncbi.connect_database()
    for tfile in target_trees:
        #print tfile
        t = PhyloTree(tfile, sp_naming_function=None)
        if GET_TAXID:
            for n in t.iter_leaves():
                n.name = GET_TAXID(n.name)
        
        if args.outgroup:
            if len(args.outgroup) == 1:
                out = t & args.outgroup[0]
            else:
                out = t.get_common_ancestor(args.outgroup)
                if set(out.get_leaf_names()) ^ set(args.outgroup):
                    raise ValueError("Outgroup is not monophyletic")
                
            t.set_outgroup(out)
        t.ladderize()

        if prev_tree:
            tree_compare(t, prev_tree)
        prev_tree = t
       
        
        if args.tax_info:
            tax2name, tax2track = annotate_tree_with_taxa(t, args.tax_info, tax2name, tax2track)
            if args.dump_tax_info:
                cPickle.dump(tax2track, open("tax2track.pkl", "w"))
                cPickle.dump(tax2name, open("tax2name.pkl", "w"))
                print "Tax info written into pickle files"
        else:
            for n in t.iter_leaves():
                spcode = n.name
                n.add_features(taxid=spcode)
                n.add_features(species=spcode)
            tax2name, tax2track = annotate_tree_with_taxa(t, None, tax2name, tax2track)
            
        # Split tree into species trees
        #subtrees =  t.get_speciation_trees()
        if not args.rf_only:
            #print "Calculating tree subparts..."
            t1 = time.time()
            if not args.is_sptree:
                subtrees =  t.split_by_dups()
                #print "Subparts:", len(subtrees), time.time()-t1
            else:
                subtrees = [t]

          
            valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(t, subtrees, show_tree=SHOW_TREE)
            
            #print valid_subtrees, broken_subtrees, ncbi_mistakes, total_rf
        else:
            subtrees = []
            valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = 0, 0, 0, 0, 0, 0
            
        ndups = 0
        nsubtrees = len(subtrees)
           
        rf = 0
        rf_max = 0
        rf_std = 0
        rf_med = 0
        common_names = 0
        max_size = 0
        if reft and len(subtrees) == 1:
            rf = t.robinson_foulds(reft, attr_t1="realname")
            rf_max = rf[1]
            rf = rf[0]
            rf_med = rf
            
        elif reft:
            #print "Calculating avg RF..."
            nsubtrees, ndups, subtrees = t.get_speciation_trees(map_features=["taxid"])
            #print len(subtrees), "Sub-Species-trees found"
            avg_rf = []
            rf_max = 0.0 # reft.robinson_foulds(reft)[1]
            sum_size = 0.0
            print nsubtrees, "subtrees", ndups, "duplications"

            for ii, subt in enumerate(subtrees):
                print "\r%d" %ii,
                sys.stdout.flush()
                try:
                    partial_rf = subt.robinson_foulds(reft, attr_t1="taxid")
                except ValueError:
                    pass
                else:
                    sptree_size = len(set([n.taxid for n in subt.iter_leaves()]))
                    sum_size += sptree_size
                    avg_rf.append((partial_rf[0]/float(partial_rf[1])) * sptree_size)
                    common_names = len(partial_rf[3])
                    max_size = max(max_size, sptree_size)
                    rf_max = max(rf_max, partial_rf[1])
                #print  partial_rf[:2]
            rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist
            rf_std = numpy.std(avg_rf)
            rf_med = numpy.median(avg_rf)

        sizes_info = "%0.1f/%0.1f +- %0.1f" %( numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes))
        iter_values = [os.path.basename(tfile), nsubtrees, ndups,
                        broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med,
                       rf_std, rf_max, common_names] 
        print >>OUT, '|'.join(map(lambda x: str(x).strip().ljust(15), iter_values)) 
        fixed = sorted([n for n in prev_broken if n not in broken_clades])
        new_problems =  sorted(broken_clades - prev_broken)
        fixed_string = color(', '.join(fixed), "green") if fixed else ""
        problems_string = color(', '.join(new_problems), "red") if new_problems else ""
        OUT.write("    Fixed clades: %s\n" %fixed_string) if fixed else None
        OUT.write("    New broken:   %s\n" %problems_string) if new_problems else None
        prev_broken = broken_clades
        ENTRIES.append([os.path.basename(tfile), nsubtrees, ndups,
                        broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string])
        OUT.flush()
        if args.show_tree or args.render:
            ts = TreeStyle()
            ts.force_topology = True
            #ts.tree_width = 500
            ts.show_leaf_name = False
            ts.layout_fn = ncbi_layout 
            ts.mode = "r"
            t.dist = 0
            if args.show_tree:
                #if args.hide_monophyletic:
                #    tax2monophyletic = {}
                #    n2content = t.get_node2content()
                #    for node in t.traverse():
                #        term2count = defaultdict(int)
                #        for leaf in n2content[node]:
                #            if leaf.lineage:
                #                for term in leaf.lineage:
                #                    term2count[term] += 1
                #        expected_size = len(n2content)
                #        for term, count in term2count.iteritems():
                #            if count > 1
                    
                print "Showing tree..."
                t.show(tree_style=ts)
            else:
                t.render("img.svg", tree_style=ts, dpi=300)
            print "dumping color config"
            cPickle.dump(name2color, open("ncbi_colors.pkl", "w"))

        if args.dump:
            cPickle.dump(t, open("ncbi_analysis.pkl", "w"))
            
    print
    print
    HEADER = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Broken branches", "Clade sizes", "Fixed Groups", "New Broken Clades")
    print_table(ENTRIES, max_col_width = 50, row_line=True, header=HEADER)
            
    if args.output:
        OUT.close()
Пример #10
0
def ncbi_consensus(self, ):
    nsubtrees, ndups, subtrees = self.get_speciation_trees(map_features=["taxid"])

    valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(t, subtrees, show_tree=SHOW_TREE)


    avg_rf = []
    rf_max = 0.0 # reft.robinson_foulds(reft)[1]
    sum_size = 0.0

    #reftree = 


    for tn, subt in enumerate(subtrees):
        partial_rf = subt.robinson_foulds(reft, attr_t1="taxid")

        sptree_size = len(set([n.taxid for n in subt.iter_leaves()]))
        sum_size += sptree_size
        avg_rf.append((partial_rf[0]/float(partial_rf[1])) * sptree_size)
        common_names = len(partial_rf[3])
        max_size = max(max_size, sptree_size)
        rf_max = max(rf_max, partial_rf[1])


        rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist
        rf_std = numpy.std(avg_rf)
        rf_med = numpy.median(avg_rf)

        sizes_info = "%0.1f/%0.1f +- %0.1f" %( numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes))
        iter_values = [os.path.basename(tfile), nsubtrees, ndups,
                        broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med,
                       rf_std, rf_max, common_names] 
        print >>OUT, '|'.join(map(lambda x: str(x).strip().ljust(15), iter_values)) 
        fixed = sorted([n for n in prev_broken if n not in broken_clades])
        new_problems =  sorted(broken_clades - prev_broken)
        fixed_string = color(', '.join(fixed), "green") if fixed else ""
        problems_string = color(', '.join(new_problems), "red") if new_problems else ""
        OUT.write("    Fixed clades: %s\n" %fixed_string) if fixed else None
        OUT.write("    New broken:   %s\n" %problems_string) if new_problems else None
        prev_broken = broken_clades
        ENTRIES.append([os.path.basename(tfile), nsubtrees, ndups,
                        broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string])
        OUT.flush()
        if args.show_tree or args.render:
            ts = TreeStyle()
            ts.force_topology = True
            #ts.tree_width = 500
            ts.show_leaf_name = False
            ts.layout_fn = ncbi_layout 
            ts.mode = "r"
            t.dist = 0
            if args.show_tree:
                #if args.hide_monophyletic:
                #    tax2monophyletic = {}
                #    n2content = t.get_node2content()
                #    for node in t.traverse():
                #        term2count = defaultdict(int)
                #        for leaf in n2content[node]:
                #            if leaf.lineage:
                #                for term in leaf.lineage:
                #                    term2count[term] += 1
                #        expected_size = len(n2content)
                #        for term, count in term2count.iteritems():
                #            if count > 1
                    
                print "Showing tree..."
                t.show(tree_style=ts)
            else:
                t.render("img.svg", tree_style=ts, dpi=300)
            print "dumping color config"
            cPickle.dump(name2color, open("ncbi_colors.pkl", "w"))

        if args.dump:
            cPickle.dump(t, open("ncbi_analysis.pkl", "w"))
Пример #11
0
def main():
    common.verify_output_dir(TMP_DIR)
    out = get_output_filename()
    command = "arecord -d 0 -c 2 -f S16_LE -r 44100 -t wav -D copy | oggenc -o %s -" % out
    print color(command, 'cyan')
    print color("Press CTRL+C to stop the recording process.", 'green')
    start = time.time()
    os.system(command)
    end = time.time()
    print color(common.elapsed_time(end, start), 'yellow')
    print color(
        "Size of the output file: %s bytes." %
        common.numberToPrettyString(os.path.getsize(out)), 'yellow')
    print color(
        "If you want to listen to the recorded file, execute the following command:",
        'green')
    print color("mplayer %s" % out, 'cyan')
Пример #12
0
def main():
    common.verify_output_dir(TMP_DIR)
    out = get_output_filename()
    command = "arecord -d 0 -c 2 -f S16_LE -r 44100 -t wav -D copy | oggenc -o %s -" % out
    print color(command, 'cyan')
    print color("Press CTRL+C to stop the recording process.", 'green')
    start = time.time()
    os.system(command)
    end = time.time()
    print color( common.elapsed_time(end, start), 'yellow' )
    print color("Size of the output file: %s bytes." % common.numberToPrettyString(os.path.getsize(out)), 'yellow')
    print color("If you want to listen to the recorded file, execute the following command:", 'green')
    print color("mplayer %s" % out, 'cyan')
Пример #13
0
plt.subplot(111)

# Decide on the y axis type
gs = db.global_settings
if logscale:
    myplot = plt.semilogy
    name += '_semilog'
elif gs['default_yscale'] == 'log':
    myplot = plt.semilogy
    name += '_semilog'
else:
    myplot = plt.plot
    name += '_linear'

# object to give first good color, and then random colors
c = color()

# Make plot
for data in db.get_data():
    myplot(data['data'][:,0], data['data'][:,1], color=c.get_color())

# Now we are done with the plotting, change axis if necessary
# Get current axis limits
axis = plt.axis()
if options.xmin != options.xmax:
    axis = (float(options.xmin), float(options.xmax)) + axis[2:4]
if options.ymin != options.ymax:
    axis = axis[0:2] + (float(options.ymin), float(options.ymax))
if flip_x:
    axis = (axis[1], axis[0]) + axis[2:4]
plt.axis(axis)
Пример #14
0
def ncbi_consensus(self, ):
    nsubtrees, ndups, subtrees = self.get_speciation_trees(
        map_features=["taxid"])

    valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(
        t, subtrees, show_tree=SHOW_TREE)

    avg_rf = []
    rf_max = 0.0  # reft.robinson_foulds(reft)[1]
    sum_size = 0.0

    #reftree =

    for tn, subt in enumerate(subtrees):
        partial_rf = subt.robinson_foulds(reft, attr_t1="taxid")

        sptree_size = len(set([n.taxid for n in subt.iter_leaves()]))
        sum_size += sptree_size
        avg_rf.append((partial_rf[0] / float(partial_rf[1])) * sptree_size)
        common_names = len(partial_rf[3])
        max_size = max(max_size, sptree_size)
        rf_max = max(rf_max, partial_rf[1])

        rf = numpy.sum(avg_rf) / float(sum_size)  # Treeko dist
        rf_std = numpy.std(avg_rf)
        rf_med = numpy.median(avg_rf)

        sizes_info = "%0.1f/%0.1f +- %0.1f" % (numpy.mean(broken_sizes),
                                               numpy.median(broken_sizes),
                                               numpy.std(broken_sizes))
        iter_values = [
            os.path.basename(tfile), nsubtrees, ndups, broken_subtrees,
            ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std,
            rf_max, common_names
        ]
        print >> OUT, '|'.join(
            map(lambda x: str(x).strip().ljust(15), iter_values))
        fixed = sorted([n for n in prev_broken if n not in broken_clades])
        new_problems = sorted(broken_clades - prev_broken)
        fixed_string = color(', '.join(fixed), "green") if fixed else ""
        problems_string = color(', '.join(new_problems),
                                "red") if new_problems else ""
        OUT.write("    Fixed clades: %s\n" % fixed_string) if fixed else None
        OUT.write("    New broken:   %s\n" %
                  problems_string) if new_problems else None
        prev_broken = broken_clades
        ENTRIES.append([
            os.path.basename(tfile), nsubtrees, ndups, broken_subtrees,
            ncbi_mistakes, broken_branches, sizes_info, fixed_string,
            problems_string
        ])
        OUT.flush()
        if args.show_tree or args.render:
            ts = TreeStyle()
            ts.force_topology = True
            #ts.tree_width = 500
            ts.show_leaf_name = False
            ts.layout_fn = ncbi_layout
            ts.mode = "r"
            t.dist = 0
            if args.show_tree:
                #if args.hide_monophyletic:
                #    tax2monophyletic = {}
                #    n2content = t.get_node2content()
                #    for node in t.traverse():
                #        term2count = defaultdict(int)
                #        for leaf in n2content[node]:
                #            if leaf.lineage:
                #                for term in leaf.lineage:
                #                    term2count[term] += 1
                #        expected_size = len(n2content)
                #        for term, count in term2count.iteritems():
                #            if count > 1

                print "Showing tree..."
                t.show(tree_style=ts)
            else:
                t.render("img.svg", tree_style=ts, dpi=300)
            print "dumping color config"
            cPickle.dump(name2color, open("ncbi_colors.pkl", "w"))

        if args.dump:
            cPickle.dump(t, open("ncbi_analysis.pkl", "w"))
Пример #15
0
plt.subplot(111)

# Decide on the y axis type
gs = db.global_settings
if logscale:
    myplot = plt.semilogy
    name += '_semilog'
elif gs['default_yscale'] == 'log':
    myplot = plt.semilogy
    name += '_semilog'
else:
    myplot = plt.plot
    name += '_linear'

# object to give first good color, and then random colors
c = color()

# Make plot
for data in db.get_data():
    myplot(data['data'][:, 0], data['data'][:, 1], color=c.get_color())

# Now we are done with the plotting, change axis if necessary
# Get current axis limits
axis = plt.axis()
if options.xmin != options.xmax:
    axis = (float(options.xmin), float(options.xmax)) + axis[2:4]
if options.ymin != options.ymax:
    axis = axis[0:2] + (float(options.ymin), float(options.ymax))
if flip_x:
    axis = (axis[1], axis[0]) + axis[2:4]
plt.axis(axis)