def setup_logging(log_level=None, output_collection=None, db_host=None, log_name=None, **kw): try: database, collection = output_collection.split('.', 1) handler = MongoHandler(db_host, database, '.'.join( (collection, log_name))) log_level = getattr(logging, log_level.upper()) handler.setLevel(log_level) fields = { 'datetime': 'created', 'process_id': 'process', 'level': 'levelname', 'message': 'message' } info = {'host': gethostname()} formatter = MongoFormatter(fields, info) handler.setFormatter(formatter) logging.root.addHandler(handler) logging.root.setLevel(log_level) return handler.getClient() except: sys.stderr.write(' Unable to set up logging:\n' + format_exc()) masterslave.exit(1)
def istats(output_directory=None, mode='s', **kw): try: filenames = glob.iglob(os.path.join(output_directory, '*.json')) except: logging.critical(' Unable to open ' + output_directory) masterslave.exit(1) for filename in filenames: try: if filename[-7:] == '_g.json': continue gene = os.path.basename(filename[:-5]) with open(filename) as f_file: f_stats = json.load(f_file) g_stats = [] g_filepath = os.path.join(output_directory, gene + '_g.json') if os.path.exists(g_filepath): if mode == 's': logging.info(' Skipping ' + gene + ': output exists') continue elif mode == 'a': with open(g_filepath) as g_file: g_stats = json.load(g_file) last_triad = None stats = [] for f_row in f_stats: if 'tip_names' not in f_row: continue triad = frozenset(f_row['tip_names']) if last_triad != triad: last_triad = triad stats.append([]) stats[-1].append(f_row) if g_stats == []: g_stats = [{} for i in range(len(stats))] if len(g_stats) != len(stats): logging.error(' Skipping ' + filename + ':\n' + ' found ' + str(len(stats)) + ' triad(s) and ' + str(len(g_stats)) + ' bootstrap records(s). Numbers should match.') continue for f_stats, g_stats in zip(stats, g_stats): yield gene, f_stats, g_stats except: logging.warning(' Skipping ' + filename + ':\n' + format_exc())
def setup_logging(log_level=None, log_file=None, **kw): try: if log_file: log_dir = os.path.dirname(log_file) masterslave.checkmakedirs(log_dir) handler = masterslave.MPIFileHandler(log_file) else: handler = logging.StreamHandler() log_level = getattr(logging, log_level.upper()) handler.setLevel(log_level) hostpid = '' if masterslave.USING_MPI: hostpid = gethostname() + ':' + str(os.getpid()) + ':' formatter = logging.Formatter('%(asctime)s:' + hostpid + '%(levelname)s:%(message)s') handler.setFormatter(formatter) logging.root.addHandler(handler) logging.root.setLevel(log_level) except: sys.stderr.write(' Unable to set up logging:\n' + format_exc()) masterslave.exit(1)
def write_files(gene, f_stats, g_stats, output_directory, log_file): try: f_stats.append({'log_file': log_file}) filepath = os.path.join(output_directory, gene + '.json') with open(filepath) as infile: old_stats = json.load(infile) for row in old_stats: if 'log_file' in row: f_stats.append(row) with open(filepath, 'w') as outfile: json.dump(f_stats, outfile) filepath = os.path.join(output_directory, gene + '_g.json') with open(filepath, 'w') as outfile: json.dump(g_stats, outfile) except: if gene: logging.error(' Problem collecting output for ' + gene + ':\n' + format_exc()) else: logging.critical(' Null gene in write_files()') masterslave.exit(1)
def itriads(input_directory=None, codon_position=-1, output_directory=None, force_recalculation=False, triad_file=None, num_range=None, **kw): try: filenames = glob.glob(os.path.join(input_directory, '*.fasta*')) if num_range: filenames = filenames[num_range] except: logging.critical(' Unable to open input directory:\n' + format_exc()) masterslave.exit(1) try: if not os.path.exists(output_directory): os.makedirs(output_directory) # only executed by master except: logging.critical(' Unable to open output directory:\n' + format_exc()) masterslave.exit(1) for filename in filenames: try: if filename[-6:] == '.fasta': gene = os.path.basename(filename[:-6]) with open(filename) as fastafile: fastadata = fastafile.read() elif filename[-9:] == '.fasta.gz': gene = os.path.basename(filename[:-9]) with GzipFile(filename) as fastafile: fastadata = fastafile.read() else: continue out_filepath = os.path.join(output_directory, gene + '.json') if not force_recalculation and os.path.exists(out_filepath): logging.info(' Skipping ' + gene + ': output exists') continue sequences = LoadSeqs(data=fastadata) if codon_position > 0: c = codon_position indices = [(i, i + 1) for i in range(c - 1, len(sequences), 3)] pos3 = sequences.addFeature('pos3', 'pos3', indices) sequences = pos3.getSlice() if triad_file: with open(triad_file) as f: triads = [l.split() for l in f] else: triads = combinations(sequences.getSeqNames(), 3) num_triads = 0 for triad in triads: for taxon in triad: if taxon not in sequences.Names: logging.info(' Skipping ' + '/'.join(triad) + ' in ' + gene + ': ' + taxon + ' is missing') break else: num_triads += 1 sa = sequences.takeSeqs(triad) sa = sa.filtered(lambda x: set(''.join(x)) <= set(DNA)) if len(sa) == 0: logging.info(' Skipping ' + '/'.join(triad) + ' in ' + gene + ': filtered alignment has length zero') continue yield gene, sa if num_triads == 0: logging.info(' Skipping ' + gene + ': found no valid triads') except: logging.warning(' Skipping ' + filename + ':\n' + format_exc())