def show_difftable_topo(difftable, attr1, attr2, usecolor=False): if not difftable: return showtable = [] maxcolwidth = 80 total_dist = 0 for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True): total_dist += dist n1 = Tree(n1.write(features=[attr1])) n2 = Tree(n2.write(features=[attr2])) n1.ladderize() n2.ladderize() for leaf in n1.iter_leaves(): leaf.name = getattr(leaf, attr1) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") for leaf in n2.iter_leaves(): leaf.name = getattr(leaf, attr2) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") topo1 = n1.get_ascii(show_internal=False, compact=False) topo2 = n2.get_ascii(show_internal=False, compact=False) # This truncates too large topology strings pretending to be # scrolled to the right margin topo1_lines = topo1.split("\n") topowidth1 = max([len(l) for l in topo1_lines]) if topowidth1 > maxcolwidth: start = topowidth1 - maxcolwidth topo1 = '\n'.join([line[start + 1:] for line in topo1_lines]) topo2_lines = topo2.split("\n") topowidth2 = max([len(l) for l in topo2_lines]) if topowidth2 > maxcolwidth: start = topowidth2 - maxcolwidth topo2 = '\n'.join([line[start + 1:] for line in topo2_lines]) showtable.append([ "%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2 ]) print_table(showtable, header=["Dist", "#diffs", "Tree1", "Tree2"], max_col_width=maxcolwidth, wrap_style="wrap", row_line=True) log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
def show_difftable_topo(difftable, attr1, attr2, usecolor=False): if not difftable: return showtable = [] maxcolwidth = 80 total_dist = 0 for dist, side1, side2, diff, n1, n2 in sorted(difftable, reverse=True): total_dist += dist n1 = Tree(n1.write(features=[attr1])) n2 = Tree(n2.write(features=[attr2])) n1.ladderize() n2.ladderize() for leaf in n1.iter_leaves(): leaf.name = getattr(leaf, attr1) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") for leaf in n2.iter_leaves(): leaf.name = getattr(leaf, attr2) if leaf.name in diff: leaf.name += " ***" if usecolor: leaf.name = color(leaf.name, "red") topo1 = n1.get_ascii(show_internal=False, compact=False) topo2 = n2.get_ascii(show_internal=False, compact=False) # This truncates too large topology strings pretending to be # scrolled to the right margin topo1_lines = topo1.split("\n") topowidth1 = max([len(l) for l in topo1_lines]) if topowidth1 > maxcolwidth: start = topowidth1 - maxcolwidth topo1 = "\n".join([line[start + 1 :] for line in topo1_lines]) topo2_lines = topo2.split("\n") topowidth2 = max([len(l) for l in topo2_lines]) if topowidth2 > maxcolwidth: start = topowidth2 - maxcolwidth topo2 = "\n".join([line[start + 1 :] for line in topo2_lines]) showtable.append( ["%0.2g" % dist, "%d vs %d tips\n(%d diffs)" % (len(side1), len(side2), len(diff)), topo1, topo2] ) print_table( showtable, header=["Dist", "#diffs", "Tree1", "Tree2"], max_col_width=maxcolwidth, wrap_style="wrap", row_line=True, ) log.info("Total euclidean distance:\t%0.4f\tMismatching nodes:\t%d" % (total_dist, len(difftable)))
def get_output_filename(): print "Specify a %s where to save the soundcard output %s." % ( color('filename','green'), color('(without .ogg)','green') ) print "You can also press %s to save the output %s." % ( color('ENTER','green'), color('to a temp. file','green') ) filename = raw_input( color("Filename: ", 'yellow', ['bold']) ) if len(filename) == 0: filename = os.path.join( TMP_DIR, "record_%s.ogg" % common.get_timestamp() ) else: filename = os.path.join(TMP_DIR, filename + ".ogg") if os.path.exists(filename): sys.stderr.write( "%s: Error: the file %s already exists.\n" % (sys.argv[0], filename) ) sys.exit(-2) return filename
def visualize(queries, by): categories = [] earliest = queries[0]['year'].min() latest = queries[0]['year'].max() for query in queries: first_year = query['year'].min() last_year = query['year'].max() if earliest < first_year: earliest = first_year if latest > last_year: latest = last_year categories.append(query[by][0]) for i in range(len(queries)): queries[i] = queries[i][(queries[i].year >= earliest) & (queries[i].year <= latest)] if by == 'indicator': vals = queries[i]['value'] queries[i]['value'] = (vals-vals.mean())/vals.std() fig = go.Figure() for i in range(len(queries)): query = queries[i] fig.add_trace(go.Scatter(x=query['year'], y=query['value'], name=categories[i], mode='lines', line={'width': 2, 'color': color()}, fill='none')) fig.update_layout(template='plotly_dark', plot_bgcolor='#23272c', paper_bgcolor='#23272c', yaxis_title='Value', xaxis_title='Year') return fig
def get_output_filename(): print "Specify a %s where to save the soundcard output %s." % (color( 'filename', 'green'), color('(without .ogg)', 'green')) print "You can also press %s to save the output %s." % (color( 'ENTER', 'green'), color('to a temp. file', 'green')) filename = raw_input(color("Filename: ", 'yellow', ['bold'])) if len(filename) == 0: filename = os.path.join(TMP_DIR, "record_%s.ogg" % common.get_timestamp()) else: filename = os.path.join(TMP_DIR, filename + ".ogg") if os.path.exists(filename): sys.stderr.write("%s: Error: the file %s already exists.\n" % (sys.argv[0], filename)) sys.exit(-2) return filename
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) # name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo. # action - The basic type of action to be taken when this argument is encountered at the command line. (store, store_const, store_true, store_false, append, append_const, version) # nargs - The number of command-line arguments that should be consumed. (N, ? (one or default), * (all 1 or more), + (more than 1) ) # const - A constant value required by some action and nargs selections. # default - The value produced if the argument is absent from the command line. # type - The type to which the command-line argument should be converted. # choices - A container of the allowable values for the argument. # required - Whether or not the command-line option may be omitted (optionals only). # help - A brief description of what the argument does. # metavar - A name for the argument in usage messages. # dest - The name of the attribute to be added to the object returned by parse_args(). parser.add_argument("--show", dest="show_tree", action="store_true", help="""Display tree after the analysis.""") parser.add_argument("--render", dest="render", action="store_true", help="""Render tree.""") parser.add_argument("--dump", dest="dump", action="store_true", help="""Dump analysis""") parser.add_argument( "--explore", dest="explore", type=str, help="""Reads a previously analyzed tree and visualize it""") input_args = parser.add_mutually_exclusive_group() input_args.required = True input_args.add_argument("-t", "--tree", dest="target_tree", nargs="+", type=str, help="""Tree file in newick format""") input_args.add_argument("-tf", dest="tree_list_file", type=str, help="File with the list of tree files") parser.add_argument("--tax", dest="tax_info", type=str, help="If the taxid attribute is not set in the" " newick file for all leaf nodes, a tab file file" " with the translation of name and taxid can be" " provided with this option.") parser.add_argument( "--sp_delimiter", dest="sp_delimiter", type=str, help= "If taxid is part of the leaf name, delimiter used to split the string" ) parser.add_argument( "--sp_field", dest="sp_field", type=int, default=0, help="field position for taxid after splitting leaf names") parser.add_argument("--ref", dest="ref_tree", type=str, help="Uses ref tree to compute robinson foulds" " distances of the different subtrees") parser.add_argument("--rf-only", dest="rf_only", action="store_true", help="Skip ncbi consensus analysis") parser.add_argument( "--outgroup", dest="outgroup", type=str, nargs="+", help="A list of node names defining the trees outgroup") parser.add_argument("--is_sptree", dest="is_sptree", action="store_true", help="Assumes no duplication nodes in the tree") parser.add_argument("-o", dest="output", type=str, help="Writes result into a file") parser.add_argument("--tax2name", dest="tax2name", type=str, help="") parser.add_argument("--tax2track", dest="tax2track", type=str, help="") parser.add_argument("--dump_tax_info", dest="dump_tax_info", action="store_true", help="") args = parser.parse_args(argv) if args.sp_delimiter: GET_TAXID = lambda x: x.split(args.sp_delimiter)[args.sp_field] else: GET_TAXID = None reftree_name = os.path.basename(args.ref_tree) if args.ref_tree else "" if args.explore: print >> sys.stderr, "Reading tree from file:", args.explore t = cPickle.load(open(args.explore)) ts = TreeStyle() ts.force_topology = True ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.show(tree_style=ts) print >> sys.stderr, "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) sys.exit() if args.output: OUT = open(args.output, "w") else: OUT = sys.stdout print >> sys.stderr, "Dumping results into", OUT target_trees = [] if args.tree_list_file: target_trees = [line.strip() for line in open(args.tree_list_file)] if args.target_tree: target_trees += args.target_tree prev_tree = None if args.tax2name: tax2name = cPickle.load(open(args.tax2name)) else: tax2name = {} if args.tax2track: tax2track = cPickle.load(open(args.tax2track)) else: tax2track = {} print len(tax2track), len(tax2name) header = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Clade sizes", "RF (avg)", "RF (med)", "RF (std)", "RF (max)", "Shared tips") print >> OUT, '|'.join([h.ljust(15) for h in header]) if args.ref_tree: print >> sys.stderr, "Reading ref tree from", args.ref_tree reft = Tree(args.ref_tree, format=1) else: reft = None SHOW_TREE = False if args.show_tree or args.render: SHOW_TREE = True prev_broken = set() ENTRIES = [] ncbi.connect_database() for tfile in target_trees: #print tfile t = PhyloTree(tfile, sp_naming_function=None) if GET_TAXID: for n in t.iter_leaves(): n.name = GET_TAXID(n.name) if args.outgroup: if len(args.outgroup) == 1: out = t & args.outgroup[0] else: out = t.get_common_ancestor(args.outgroup) if set(out.get_leaf_names()) ^ set(args.outgroup): raise ValueError("Outgroup is not monophyletic") t.set_outgroup(out) t.ladderize() if prev_tree: tree_compare(t, prev_tree) prev_tree = t if args.tax_info: tax2name, tax2track = annotate_tree_with_taxa( t, args.tax_info, tax2name, tax2track) if args.dump_tax_info: cPickle.dump(tax2track, open("tax2track.pkl", "w")) cPickle.dump(tax2name, open("tax2name.pkl", "w")) print "Tax info written into pickle files" else: for n in t.iter_leaves(): spcode = n.name n.add_features(taxid=spcode) n.add_features(species=spcode) tax2name, tax2track = annotate_tree_with_taxa( t, None, tax2name, tax2track) # Split tree into species trees #subtrees = t.get_speciation_trees() if not args.rf_only: #print "Calculating tree subparts..." t1 = time.time() if not args.is_sptree: subtrees = t.split_by_dups() #print "Subparts:", len(subtrees), time.time()-t1 else: subtrees = [t] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees( t, subtrees, show_tree=SHOW_TREE) #print valid_subtrees, broken_subtrees, ncbi_mistakes, total_rf else: subtrees = [] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = 0, 0, 0, 0, 0, 0 ndups = 0 nsubtrees = len(subtrees) rf = 0 rf_max = 0 rf_std = 0 rf_med = 0 common_names = 0 max_size = 0 if reft and len(subtrees) == 1: rf = t.robinson_foulds(reft, attr_t1="realname") rf_max = rf[1] rf = rf[0] rf_med = rf elif reft: #print "Calculating avg RF..." nsubtrees, ndups, subtrees = t.get_speciation_trees( map_features=["taxid"]) #print len(subtrees), "Sub-Species-trees found" avg_rf = [] rf_max = 0.0 # reft.robinson_foulds(reft)[1] sum_size = 0.0 print nsubtrees, "subtrees", ndups, "duplications" for ii, subt in enumerate(subtrees): print "\r%d" % ii, sys.stdout.flush() try: partial_rf = subt.robinson_foulds(reft, attr_t1="taxid") except ValueError: pass else: sptree_size = len( set([n.taxid for n in subt.iter_leaves()])) sum_size += sptree_size avg_rf.append( (partial_rf[0] / float(partial_rf[1])) * sptree_size) common_names = len(partial_rf[3]) max_size = max(max_size, sptree_size) rf_max = max(rf_max, partial_rf[1]) #print partial_rf[:2] rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist rf_std = numpy.std(avg_rf) rf_med = numpy.median(avg_rf) sizes_info = "%0.1f/%0.1f +- %0.1f" % (numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes)) iter_values = [ os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std, rf_max, common_names ] print >> OUT, '|'.join( map(lambda x: str(x).strip().ljust(15), iter_values)) fixed = sorted([n for n in prev_broken if n not in broken_clades]) new_problems = sorted(broken_clades - prev_broken) fixed_string = color(', '.join(fixed), "green") if fixed else "" problems_string = color(', '.join(new_problems), "red") if new_problems else "" OUT.write(" Fixed clades: %s\n" % fixed_string) if fixed else None OUT.write(" New broken: %s\n" % problems_string) if new_problems else None prev_broken = broken_clades ENTRIES.append([ os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string ]) OUT.flush() if args.show_tree or args.render: ts = TreeStyle() ts.force_topology = True #ts.tree_width = 500 ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.dist = 0 if args.show_tree: #if args.hide_monophyletic: # tax2monophyletic = {} # n2content = t.get_node2content() # for node in t.traverse(): # term2count = defaultdict(int) # for leaf in n2content[node]: # if leaf.lineage: # for term in leaf.lineage: # term2count[term] += 1 # expected_size = len(n2content) # for term, count in term2count.iteritems(): # if count > 1 print "Showing tree..." t.show(tree_style=ts) else: t.render("img.svg", tree_style=ts, dpi=300) print "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) if args.dump: cPickle.dump(t, open("ncbi_analysis.pkl", "w")) print print HEADER = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Broken branches", "Clade sizes", "Fixed Groups", "New Broken Clades") print_table(ENTRIES, max_col_width=50, row_line=True, header=HEADER) if args.output: OUT.close()
def __init__(self): """ Description of init """ # Create and parse the options parser = OptionParser() # Add the option names parser.add_option("-a", "--type") parser.add_option("-b", "--idlist") parser.add_option("-c", "--from_d") parser.add_option("-d", "--to_d") parser.add_option("-e", "--xmin") parser.add_option("-f", "--xmax") parser.add_option("-g", "--ymin") parser.add_option("-i", "--ymax") parser.add_option("-j", "--offset") parser.add_option("-k", "--as_function_of_t") parser.add_option("-l", "--logscale") parser.add_option("-m", "--shift_temp_unit") parser.add_option("-n", "--flip_x") parser.add_option("-o", "--shift_be_ke") parser.add_option("-p", "--size") (options, args) = parser.parse_args() # For use in other methods self.options = options ### Process options # Fetch idlist self.idlist = [ int(element) for element in options.idlist.split(',')[1:] ] # Turn the offset "key:value," pair string into a dictionary self.offsets = dict([[int(offset.split(':')[0]), offset.split(':')[1]] for offset in options.offset.split(',')[1:]]) # Turn as_function_of_t into boolean self.as_function_of_t = True if options.as_function_of_t ==\ 'checked' else False self.shift_temp_unit = True if options.shift_temp_unit ==\ 'checked' else False self.logscale = True if options.logscale == 'checked' else False self.flip_x = True if options.flip_x == 'checked' else False self.shift_be_ke = True if options.shift_be_ke == 'checked' else False ### Create db object # ADD MORE OPTIONS self.from_to = {'from': options.from_d, 'to': options.to_d} self.db = dataBaseBackend(typed=options.type, from_to=self.from_to, id_list=self.idlist, offsets=self.offsets, as_function_of_t=self.as_function_of_t, shift_temp_unit=self.shift_temp_unit, shift_be_ke=self.shift_be_ke) self.standard_sizes = { 'small': '450x300', 'large': '4500x3000', 'def_size': '900x600' } # The 'name' is a string that is unique for this plot # Here we add all the information that is entered into the db object self.name = self.db.global_settings['chamber_name'] + '_' + options.type if options.from_d != '' or options.to_d != '': self.name += '_' + options.from_d + '_' + options.to_d self.name += ('_' + 'as_function_of_t') if self.as_function_of_t else '' self.name += ('_' + 'shift_temp_unit') if self.shift_temp_unit else '' self.name += ('_' + 'logscale') if self.logscale else '' self.name += ('_' + 'flip_x') if self.flip_x else '' self.name += ('_' + 'shift_be_ke') if self.shift_be_ke else '' if len(self.idlist) > 0: self.name += '_' + str(self.idlist) # object to give first good color, and then random colors self.c = color()
def __init__(self): """ Description of init """ # Create and parse the options parser = OptionParser() # Add the option names parser.add_option("-a", "--type") parser.add_option("-b", "--idlist") parser.add_option("-c", "--from_d") parser.add_option("-d", "--to_d") parser.add_option("-e", "--xmin") parser.add_option("-f", "--xmax") parser.add_option("-g", "--ymin") parser.add_option("-i", "--ymax") parser.add_option("-j", "--offset") parser.add_option("-k", "--as_function_of_t") parser.add_option("-l", "--logscale") parser.add_option("-m", "--shift_temp_unit") parser.add_option("-n", "--flip_x") parser.add_option("-o", "--shift_be_ke") parser.add_option("-p", "--size") (options, args) = parser.parse_args() # For use in other methods self.options = options ### Process options # Fetch idlist self.idlist = [int(element) for element in options.idlist.split(',')[1:]] # Turn the offset "key:value," pair string into a dictionary self.offsets = dict([[int(offset.split(':')[0]), offset.split(':')[1]] for offset in options.offset.split(',')[1:]]) # Turn as_function_of_t into boolean self.as_function_of_t = True if options.as_function_of_t ==\ 'checked' else False self.shift_temp_unit = True if options.shift_temp_unit ==\ 'checked' else False self.logscale = True if options.logscale == 'checked' else False self.flip_x = True if options.flip_x == 'checked' else False self.shift_be_ke = True if options.shift_be_ke == 'checked' else False ### Create db object # ADD MORE OPTIONS self.from_to = {'from':options.from_d, 'to':options.to_d} self.db = dataBaseBackend(typed=options.type, from_to=self.from_to, id_list=self.idlist, offsets=self.offsets, as_function_of_t=self.as_function_of_t, shift_temp_unit=self.shift_temp_unit, shift_be_ke=self.shift_be_ke) self.standard_sizes = {'small':'450x300', 'large':'4500x3000', 'def_size':'900x600'} # The 'name' is a string that is unique for this plot # Here we add all the information that is entered into the db object self.name = self.db.global_settings['chamber_name'] + '_' + options.type if options.from_d != '' or options.to_d != '': self.name += '_' + options.from_d + '_' + options.to_d self.name += ('_' + 'as_function_of_t') if self.as_function_of_t else '' self.name += ('_' + 'shift_temp_unit') if self.shift_temp_unit else '' self.name += ('_' + 'logscale') if self.logscale else '' self.name += ('_' + 'flip_x') if self.flip_x else '' self.name += ('_' + 'shift_be_ke') if self.shift_be_ke else '' if len(self.idlist) > 0: self.name += '_' + str(self.idlist) # object to give first good color, and then random colors self.c = color()
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) # name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo. # action - The basic type of action to be taken when this argument is encountered at the command line. (store, store_const, store_true, store_false, append, append_const, version) # nargs - The number of command-line arguments that should be consumed. (N, ? (one or default), * (all 1 or more), + (more than 1) ) # const - A constant value required by some action and nargs selections. # default - The value produced if the argument is absent from the command line. # type - The type to which the command-line argument should be converted. # choices - A container of the allowable values for the argument. # required - Whether or not the command-line option may be omitted (optionals only). # help - A brief description of what the argument does. # metavar - A name for the argument in usage messages. # dest - The name of the attribute to be added to the object returned by parse_args(). parser.add_argument("--show", dest="show_tree", action="store_true", help="""Display tree after the analysis.""") parser.add_argument("--render", dest="render", action="store_true", help="""Render tree.""") parser.add_argument("--dump", dest="dump", action="store_true", help="""Dump analysis""") parser.add_argument("--explore", dest="explore", type=str, help="""Reads a previously analyzed tree and visualize it""") input_args = parser.add_mutually_exclusive_group() input_args.required=True input_args.add_argument("-t", "--tree", dest="target_tree", nargs="+", type=str, help="""Tree file in newick format""") input_args.add_argument("-tf", dest="tree_list_file", type=str, help="File with the list of tree files") parser.add_argument("--tax", dest="tax_info", type=str, help="If the taxid attribute is not set in the" " newick file for all leaf nodes, a tab file file" " with the translation of name and taxid can be" " provided with this option.") parser.add_argument("--sp_delimiter", dest="sp_delimiter", type=str, help="If taxid is part of the leaf name, delimiter used to split the string") parser.add_argument("--sp_field", dest="sp_field", type=int, default=0, help="field position for taxid after splitting leaf names") parser.add_argument("--ref", dest="ref_tree", type=str, help="Uses ref tree to compute robinson foulds" " distances of the different subtrees") parser.add_argument("--rf-only", dest="rf_only", action = "store_true", help="Skip ncbi consensus analysis") parser.add_argument("--outgroup", dest="outgroup", type=str, nargs="+", help="A list of node names defining the trees outgroup") parser.add_argument("--is_sptree", dest="is_sptree", action = "store_true", help="Assumes no duplication nodes in the tree") parser.add_argument("-o", dest="output", type=str, help="Writes result into a file") parser.add_argument("--tax2name", dest="tax2name", type=str, help="") parser.add_argument("--tax2track", dest="tax2track", type=str, help="") parser.add_argument("--dump_tax_info", dest="dump_tax_info", action="store_true", help="") args = parser.parse_args(argv) if args.sp_delimiter: GET_TAXID = lambda x: x.split(args.sp_delimiter)[args.sp_field] else: GET_TAXID = None reftree_name = os.path.basename(args.ref_tree) if args.ref_tree else "" if args.explore: print >>sys.stderr, "Reading tree from file:", args.explore t = cPickle.load(open(args.explore)) ts = TreeStyle() ts.force_topology = True ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.show(tree_style=ts) print >>sys.stderr, "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) sys.exit() if args.output: OUT = open(args.output, "w") else: OUT = sys.stdout print >>sys.stderr, "Dumping results into", OUT target_trees = [] if args.tree_list_file: target_trees = [line.strip() for line in open(args.tree_list_file)] if args.target_tree: target_trees += args.target_tree prev_tree = None if args.tax2name: tax2name = cPickle.load(open(args.tax2name)) else: tax2name = {} if args.tax2track: tax2track = cPickle.load(open(args.tax2track)) else: tax2track = {} print len(tax2track), len(tax2name) header = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Clade sizes", "RF (avg)", "RF (med)", "RF (std)", "RF (max)", "Shared tips") print >>OUT, '|'.join([h.ljust(15) for h in header]) if args.ref_tree: print >>sys.stderr, "Reading ref tree from", args.ref_tree reft = Tree(args.ref_tree, format=1) else: reft = None SHOW_TREE = False if args.show_tree or args.render: SHOW_TREE = True prev_broken = set() ENTRIES = [] ncbi.connect_database() for tfile in target_trees: #print tfile t = PhyloTree(tfile, sp_naming_function=None) if GET_TAXID: for n in t.iter_leaves(): n.name = GET_TAXID(n.name) if args.outgroup: if len(args.outgroup) == 1: out = t & args.outgroup[0] else: out = t.get_common_ancestor(args.outgroup) if set(out.get_leaf_names()) ^ set(args.outgroup): raise ValueError("Outgroup is not monophyletic") t.set_outgroup(out) t.ladderize() if prev_tree: tree_compare(t, prev_tree) prev_tree = t if args.tax_info: tax2name, tax2track = annotate_tree_with_taxa(t, args.tax_info, tax2name, tax2track) if args.dump_tax_info: cPickle.dump(tax2track, open("tax2track.pkl", "w")) cPickle.dump(tax2name, open("tax2name.pkl", "w")) print "Tax info written into pickle files" else: for n in t.iter_leaves(): spcode = n.name n.add_features(taxid=spcode) n.add_features(species=spcode) tax2name, tax2track = annotate_tree_with_taxa(t, None, tax2name, tax2track) # Split tree into species trees #subtrees = t.get_speciation_trees() if not args.rf_only: #print "Calculating tree subparts..." t1 = time.time() if not args.is_sptree: subtrees = t.split_by_dups() #print "Subparts:", len(subtrees), time.time()-t1 else: subtrees = [t] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(t, subtrees, show_tree=SHOW_TREE) #print valid_subtrees, broken_subtrees, ncbi_mistakes, total_rf else: subtrees = [] valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = 0, 0, 0, 0, 0, 0 ndups = 0 nsubtrees = len(subtrees) rf = 0 rf_max = 0 rf_std = 0 rf_med = 0 common_names = 0 max_size = 0 if reft and len(subtrees) == 1: rf = t.robinson_foulds(reft, attr_t1="realname") rf_max = rf[1] rf = rf[0] rf_med = rf elif reft: #print "Calculating avg RF..." nsubtrees, ndups, subtrees = t.get_speciation_trees(map_features=["taxid"]) #print len(subtrees), "Sub-Species-trees found" avg_rf = [] rf_max = 0.0 # reft.robinson_foulds(reft)[1] sum_size = 0.0 print nsubtrees, "subtrees", ndups, "duplications" for ii, subt in enumerate(subtrees): print "\r%d" %ii, sys.stdout.flush() try: partial_rf = subt.robinson_foulds(reft, attr_t1="taxid") except ValueError: pass else: sptree_size = len(set([n.taxid for n in subt.iter_leaves()])) sum_size += sptree_size avg_rf.append((partial_rf[0]/float(partial_rf[1])) * sptree_size) common_names = len(partial_rf[3]) max_size = max(max_size, sptree_size) rf_max = max(rf_max, partial_rf[1]) #print partial_rf[:2] rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist rf_std = numpy.std(avg_rf) rf_med = numpy.median(avg_rf) sizes_info = "%0.1f/%0.1f +- %0.1f" %( numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes)) iter_values = [os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std, rf_max, common_names] print >>OUT, '|'.join(map(lambda x: str(x).strip().ljust(15), iter_values)) fixed = sorted([n for n in prev_broken if n not in broken_clades]) new_problems = sorted(broken_clades - prev_broken) fixed_string = color(', '.join(fixed), "green") if fixed else "" problems_string = color(', '.join(new_problems), "red") if new_problems else "" OUT.write(" Fixed clades: %s\n" %fixed_string) if fixed else None OUT.write(" New broken: %s\n" %problems_string) if new_problems else None prev_broken = broken_clades ENTRIES.append([os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string]) OUT.flush() if args.show_tree or args.render: ts = TreeStyle() ts.force_topology = True #ts.tree_width = 500 ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.dist = 0 if args.show_tree: #if args.hide_monophyletic: # tax2monophyletic = {} # n2content = t.get_node2content() # for node in t.traverse(): # term2count = defaultdict(int) # for leaf in n2content[node]: # if leaf.lineage: # for term in leaf.lineage: # term2count[term] += 1 # expected_size = len(n2content) # for term, count in term2count.iteritems(): # if count > 1 print "Showing tree..." t.show(tree_style=ts) else: t.render("img.svg", tree_style=ts, dpi=300) print "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) if args.dump: cPickle.dump(t, open("ncbi_analysis.pkl", "w")) print print HEADER = ("TargetTree", "Subtrees", "Ndups", "Broken subtrees", "Broken clades", "Broken branches", "Clade sizes", "Fixed Groups", "New Broken Clades") print_table(ENTRIES, max_col_width = 50, row_line=True, header=HEADER) if args.output: OUT.close()
def ncbi_consensus(self, ): nsubtrees, ndups, subtrees = self.get_speciation_trees(map_features=["taxid"]) valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees(t, subtrees, show_tree=SHOW_TREE) avg_rf = [] rf_max = 0.0 # reft.robinson_foulds(reft)[1] sum_size = 0.0 #reftree = for tn, subt in enumerate(subtrees): partial_rf = subt.robinson_foulds(reft, attr_t1="taxid") sptree_size = len(set([n.taxid for n in subt.iter_leaves()])) sum_size += sptree_size avg_rf.append((partial_rf[0]/float(partial_rf[1])) * sptree_size) common_names = len(partial_rf[3]) max_size = max(max_size, sptree_size) rf_max = max(rf_max, partial_rf[1]) rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist rf_std = numpy.std(avg_rf) rf_med = numpy.median(avg_rf) sizes_info = "%0.1f/%0.1f +- %0.1f" %( numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes)) iter_values = [os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std, rf_max, common_names] print >>OUT, '|'.join(map(lambda x: str(x).strip().ljust(15), iter_values)) fixed = sorted([n for n in prev_broken if n not in broken_clades]) new_problems = sorted(broken_clades - prev_broken) fixed_string = color(', '.join(fixed), "green") if fixed else "" problems_string = color(', '.join(new_problems), "red") if new_problems else "" OUT.write(" Fixed clades: %s\n" %fixed_string) if fixed else None OUT.write(" New broken: %s\n" %problems_string) if new_problems else None prev_broken = broken_clades ENTRIES.append([os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string]) OUT.flush() if args.show_tree or args.render: ts = TreeStyle() ts.force_topology = True #ts.tree_width = 500 ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.dist = 0 if args.show_tree: #if args.hide_monophyletic: # tax2monophyletic = {} # n2content = t.get_node2content() # for node in t.traverse(): # term2count = defaultdict(int) # for leaf in n2content[node]: # if leaf.lineage: # for term in leaf.lineage: # term2count[term] += 1 # expected_size = len(n2content) # for term, count in term2count.iteritems(): # if count > 1 print "Showing tree..." t.show(tree_style=ts) else: t.render("img.svg", tree_style=ts, dpi=300) print "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) if args.dump: cPickle.dump(t, open("ncbi_analysis.pkl", "w"))
def main(): common.verify_output_dir(TMP_DIR) out = get_output_filename() command = "arecord -d 0 -c 2 -f S16_LE -r 44100 -t wav -D copy | oggenc -o %s -" % out print color(command, 'cyan') print color("Press CTRL+C to stop the recording process.", 'green') start = time.time() os.system(command) end = time.time() print color(common.elapsed_time(end, start), 'yellow') print color( "Size of the output file: %s bytes." % common.numberToPrettyString(os.path.getsize(out)), 'yellow') print color( "If you want to listen to the recorded file, execute the following command:", 'green') print color("mplayer %s" % out, 'cyan')
def main(): common.verify_output_dir(TMP_DIR) out = get_output_filename() command = "arecord -d 0 -c 2 -f S16_LE -r 44100 -t wav -D copy | oggenc -o %s -" % out print color(command, 'cyan') print color("Press CTRL+C to stop the recording process.", 'green') start = time.time() os.system(command) end = time.time() print color( common.elapsed_time(end, start), 'yellow' ) print color("Size of the output file: %s bytes." % common.numberToPrettyString(os.path.getsize(out)), 'yellow') print color("If you want to listen to the recorded file, execute the following command:", 'green') print color("mplayer %s" % out, 'cyan')
plt.subplot(111) # Decide on the y axis type gs = db.global_settings if logscale: myplot = plt.semilogy name += '_semilog' elif gs['default_yscale'] == 'log': myplot = plt.semilogy name += '_semilog' else: myplot = plt.plot name += '_linear' # object to give first good color, and then random colors c = color() # Make plot for data in db.get_data(): myplot(data['data'][:,0], data['data'][:,1], color=c.get_color()) # Now we are done with the plotting, change axis if necessary # Get current axis limits axis = plt.axis() if options.xmin != options.xmax: axis = (float(options.xmin), float(options.xmax)) + axis[2:4] if options.ymin != options.ymax: axis = axis[0:2] + (float(options.ymin), float(options.ymax)) if flip_x: axis = (axis[1], axis[0]) + axis[2:4] plt.axis(axis)
def ncbi_consensus(self, ): nsubtrees, ndups, subtrees = self.get_speciation_trees( map_features=["taxid"]) valid_subtrees, broken_subtrees, ncbi_mistakes, broken_branches, total_rf, broken_clades, broken_sizes = analyze_subtrees( t, subtrees, show_tree=SHOW_TREE) avg_rf = [] rf_max = 0.0 # reft.robinson_foulds(reft)[1] sum_size = 0.0 #reftree = for tn, subt in enumerate(subtrees): partial_rf = subt.robinson_foulds(reft, attr_t1="taxid") sptree_size = len(set([n.taxid for n in subt.iter_leaves()])) sum_size += sptree_size avg_rf.append((partial_rf[0] / float(partial_rf[1])) * sptree_size) common_names = len(partial_rf[3]) max_size = max(max_size, sptree_size) rf_max = max(rf_max, partial_rf[1]) rf = numpy.sum(avg_rf) / float(sum_size) # Treeko dist rf_std = numpy.std(avg_rf) rf_med = numpy.median(avg_rf) sizes_info = "%0.1f/%0.1f +- %0.1f" % (numpy.mean(broken_sizes), numpy.median(broken_sizes), numpy.std(broken_sizes)) iter_values = [ os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, rf, rf_med, rf_std, rf_max, common_names ] print >> OUT, '|'.join( map(lambda x: str(x).strip().ljust(15), iter_values)) fixed = sorted([n for n in prev_broken if n not in broken_clades]) new_problems = sorted(broken_clades - prev_broken) fixed_string = color(', '.join(fixed), "green") if fixed else "" problems_string = color(', '.join(new_problems), "red") if new_problems else "" OUT.write(" Fixed clades: %s\n" % fixed_string) if fixed else None OUT.write(" New broken: %s\n" % problems_string) if new_problems else None prev_broken = broken_clades ENTRIES.append([ os.path.basename(tfile), nsubtrees, ndups, broken_subtrees, ncbi_mistakes, broken_branches, sizes_info, fixed_string, problems_string ]) OUT.flush() if args.show_tree or args.render: ts = TreeStyle() ts.force_topology = True #ts.tree_width = 500 ts.show_leaf_name = False ts.layout_fn = ncbi_layout ts.mode = "r" t.dist = 0 if args.show_tree: #if args.hide_monophyletic: # tax2monophyletic = {} # n2content = t.get_node2content() # for node in t.traverse(): # term2count = defaultdict(int) # for leaf in n2content[node]: # if leaf.lineage: # for term in leaf.lineage: # term2count[term] += 1 # expected_size = len(n2content) # for term, count in term2count.iteritems(): # if count > 1 print "Showing tree..." t.show(tree_style=ts) else: t.render("img.svg", tree_style=ts, dpi=300) print "dumping color config" cPickle.dump(name2color, open("ncbi_colors.pkl", "w")) if args.dump: cPickle.dump(t, open("ncbi_analysis.pkl", "w"))
plt.subplot(111) # Decide on the y axis type gs = db.global_settings if logscale: myplot = plt.semilogy name += '_semilog' elif gs['default_yscale'] == 'log': myplot = plt.semilogy name += '_semilog' else: myplot = plt.plot name += '_linear' # object to give first good color, and then random colors c = color() # Make plot for data in db.get_data(): myplot(data['data'][:, 0], data['data'][:, 1], color=c.get_color()) # Now we are done with the plotting, change axis if necessary # Get current axis limits axis = plt.axis() if options.xmin != options.xmax: axis = (float(options.xmin), float(options.xmax)) + axis[2:4] if options.ymin != options.ymax: axis = axis[0:2] + (float(options.ymin), float(options.ymax)) if flip_x: axis = (axis[1], axis[0]) + axis[2:4] plt.axis(axis)