def get_kmers(words, k, min_words): """ Find all words sharing kmers""" if k <= 1: die('-k "{}" must be greater than 1'.format(k)) shared = defaultdict(list) for word in words: for kmer in [word[i:i + k] for i in range(len(word) - k + 1)]: shared[kmer].append(word) # Select kmers having enough words (can't use `pop`!) # Method 1: for loop ok = dict() for kmer in shared: if len(shared[kmer]) >= min_words: ok[kmer] = shared[kmer] # Method 2: list comprehension # ok = dict([(kmer, shared[kmer]) for kmer in shared # if len(shared[kmer]) >= min_words]) # Method 3: map/filter # ok = dict( # map(lambda kmer: (kmer, shared[kmer]), # filter(lambda kmer: len(shared[kmer]) >= min_words, # shared.keys()))) return ok
def main(): """Make a jazz noise here""" args = get_args() fh = args.file out_file = args.outfile or os.path.basename(fh.name) + '.gv' nodes, edges = parse_tree(fh) if not nodes and not edges: die('No nodes or edges in "{}".'.format(fh.name)) dot = Digraph(comment='Tree') # keys are initials which we don't need for _, name in nodes.items(): dot.node(name) for n1, n2 in edges: # see if node has alias in nodes, else use node itself n1 = nodes.get(n1, n1) n2 = nodes.get(n2, n2) dot.edge(n1, n2) dot.render(out_file, view=args.view) print('Done, see output in "{}".'.format(out_file))
def read_file(file): """Return the contents of a file""" if not os.path.isfile(file): die('"{}" is not a file'.format(file)) return open(file).read().split()
def blast_main_orfs(genome, args): """ This functions writes the ORFs found from trans_orf() to a fasta file and calls run_blastp :param genome: linearized based on L1 genome :param args: command line arguments for data_dir, min_prot_len etc. :return: a list of two file paths, the first element being the blast output and the second being the ORFs found """ orfs = find_orfs_with_trans(genome, 1, args['min_prot_len']) if not orfs: raise Exception('No ORFs, must stop.') orfs_fa = os.path.join(args['program_files_dir'], 'orfs.fa') orfs_fh = open(orfs_fa, 'wt') for orf in orfs: orfs_fh.write('\n'.join(['>' + str(orfs[orf]), orf, ''])) orfs_fh.close() blast_sub = os.path.join(args['data_dir'], 'main_blast.fa') blast_out = os.path.join(args['program_files_dir'], 'blast_results_main.tab') if os.path.isfile(blast_out): os.remove(blast_out) try: num_hits = run_blastp(orfs_fa, blast_sub, blast_out) #print('Got {} hits!'.format(num_hits)) except Exception as e: die('EXCEPTION: {}'.format(e)) return blast_out, orfs_fa
def main(): """Make a jazz noise here""" args = get_args() text = args.text char = args.character width = args.width min_val = args.minimum if len(char) != 1: die('--character "{}" must be one character'.format(char)) if os.path.isfile(text): text = open(text).read() if args.case_insensitive: text = text.upper() freqs = Counter(filter(lambda c: re.match(r'\w', c), list(text))) high = max(freqs.values()) scale = high / width if high > width else 1 items = map( lambda t: (t[1], t[0]), sorted([ (v, k) for k, v in freqs.items() ], reverse=True)) if args.frequency_sort else sorted(freqs.items()) for c, num in items: if num < min_val: continue print('{} {:6} {}'.format(c, num, char * int(num / scale)))
def main(): """Make a jazz noise here""" args = get_args() out_file = args.outfile num_days = args.number_days out_fh = open(out_file, 'wt') if out_file else sys.stdout days = { 12: 'Twelve drummers drumming', 11: 'Eleven pipers piping', 10: 'Ten lords a leaping', 9: 'Nine ladies dancing', 8: 'Eight maids a milking', 7: 'Seven swans a swimming', 6: 'Six geese a laying', 5: 'Five gold rings', 4: 'Four calling birds', 3: 'Three French hens', 2: 'Two turtle doves', 1: 'a partridge in a pear tree', } ordinal = { 12: 'twelfth', 11: 'eleven', 10: 'tenth', 9: 'ninth', 8: 'eighth', 7: 'seventh', 6: 'sixth', 5: 'fifth', 4: 'fourth', 3: 'third', 2: 'second', 1: 'first', } if not num_days in days: die('Cannot sing "{}" days'.format(num_days)) for i in range(1, num_days + 1): first = 'On the {} day of Christmas,\nMy true love gave to me,' out_fh.write(first.format(ordinal[i]) + '\n') for j in reversed(range(1, i + 1)): if j == 1: if i == 1: out_fh.write('{}.\n'.format(days[j].title())) else: out_fh.write('And {}.\n'.format(days[j])) else: out_fh.write('{},\n'.format(days[j])) if i < max(days.keys()): out_fh.write('\n')
def main(): """Make a jazz noise here""" args = get_args() text = args.text word_list = args.wordlist if not os.path.isfile(word_list): die('--wordlist "{}" is not a file'.format(word_list)) logging.basicConfig( filename='.log', filemode='w', level=logging.DEBUG if args.debug else logging.CRITICAL) words = defaultdict(set) for line in open(word_list): for word in line.split(): clean = re.sub('[^a-z0-9]', '', word.lower()) if len(clean) == 1 and clean not in 'ai': continue words[len(clean)].add(clean) text_len = len(text) counts = Counter(text) anagrams = set() lengths = list(words.keys()) for i in range(1, args.num_combos + 1): key_combos = list( filter( lambda t: sum(t) == text_len, set( map(lambda t: tuple(sorted(t)), combinations(chain(lengths, lengths), i))))) for keys in key_combos: logging.debug('Searching keys {}'.format(keys)) word_combos = list(product(*list(map(lambda k: words[k], keys)))) for t in word_combos: if Counter(''.join(t)) == counts: for p in filter( lambda x: x != text, map(lambda x: ' '.join(x), permutations(t))): anagrams.add(p) logging.debug('# anagrams = {}'.format(len(anagrams))) logging.debug('Finished searching') if anagrams: print('{} ='.format(text)) for i, t in enumerate(sorted(anagrams), 1): print('{:4}. {}'.format(i, t)) else: print('No anagrams for "{}".'.format(text))
def main(): orfs = 'orfs_E5_HPV6.fa' e5 = 'blast_E5.fa' outfile = 'e5_results.tab' try: num_hits = run_blastp(orfs, e5, outfile, evalue=1e-10) print('Got {} hits!'.format(num_hits)) except Exception as e: die('EXCEPTION: {}'.format(e)) print('All was good!')
def main(): """Make a jazz noise here""" args = get_args() fasta = args.fasta if not os.path.isfile(fasta): die('"{}" is not a file'.format(fasta)) alignment = AlignIO.read(fasta, 'fasta') print(alignment) alignment_length = alignment.get_alignment_length() conserved = {} dashes = {} seq_by_id = dict([(rec.id, str(rec.seq)) for rec in alignment]) for i, rec in enumerate(alignment): seq = str(rec.seq) match = re.search(r'^([-]+)', seq) num_dashes = 0 if match: dash = match.group(1) num_dashes = len(dash) dashes[rec.id] = num_dashes pp(dashes) for i in range(0, alignment_length): col = alignment[:, i] conserved[i] = col.count('M') #pp(conserved) max_num_seqs = max(conserved.values()) print(max_num_seqs) seqs_at_max = list( filter(lambda t: t[1] == max_num_seqs, conserved.items())) if len(seqs_at_max) > 1: warn('Hey, look at this!') best_col = seqs_at_max[0][0] print(seqs_at_max) prefix = seq_by_id['unknown'][0:best_col] if len(prefix) * '-' == prefix: print('all dashes, do nothing') elif prefix.count('-') == 0: print('E6 = ', seq_by_id['unknown'][best_col:]) else: print('remove {} from beginning'.format(len(seq.replace('-', ''))))
def get_words(file): """Get words from input file""" if not os.path.isfile(file): die('"{}" is not a file') words = set() for line in open(file): for word in line.split(): words.add(re.sub('[^a-zA-Z0-9]', '', word.lower())) if not words: die('No usable words in "{}"'.format(file)) return words
def main(): """Make a jazz noise here""" args = get_args() file = args.file shift = args.shift ucase = 'ABCDEFG' lcase = 'abcdefg' num_notes = 7 if not 1 < abs(shift) <= 8: die('--shift "{}" must be between 2 and 8'.format(shift)) if not os.path.isfile(file): die('"{}" is not a file'.format(file)) # account for interval where a 2nd (-s 2) is a move of one note shift = shift - 1 if shift > 0 else shift + 1 def transpose(note): if note in lcase: pos = lcase.index(note) tran = (pos + shift) % num_notes return lcase[tran] elif note in ucase: pos = ucase.index(note) tran = (pos + shift) % num_notes return ucase[tran] else: return note for line in open(file): line = line.rstrip() if line.startswith('K:'): key = line[2] print('K:' + transpose(key)) elif (line.startswith('<') and line.endswith('>')) or re.match( '[A-Z]:\s?', line): print(line) else: for char in line.rstrip(): print(transpose(char), end='') print()
def main(): """Make a jazz noise here""" args = get_args() start = args.start fh = args.wordlist distance = args.max_distance random.seed(args.seed) logging.basicConfig( filename='.log', filemode='w', level=logging.DEBUG if args.debug else logging.CRITICAL) logging.debug('file = %s', fh.name) words = fh.read().splitlines() if not start: start = random.choice(words) if not start in words: die('Unknown word "{}"'.format(start)) def find_close(word): l = len(word) low, high = l - distance, l + distance test = filter(lambda w: low <= len(w) <= high, words) return filter(lambda w: dist(word, w) <= distance, test) chain = [start] for _ in range(args.iterations - 1): close = list(filter(lambda w: w not in chain, find_close(chain[-1]))) if not close: warn('Failed to find more words!') break next_word = random.choice(close) chain.append(next_word) for i, link in enumerate(chain, start=1): print('{:3}: {}'.format(i, link))
def main(): """Make a jazz noise here""" args = get_args() inputs = args.inputs regex = re.compile('(<([^>])+>)') text = args.file.read().rstrip() blanks = list(regex.finditer(text)) if not blanks: die('File "{}" has no placeholders'.format(args.file.name)) for match in blanks: placeholder = match.group(1) name = match.group(2) answer = inputs.pop(0) if inputs else input('{}: '.format(name)) text = re.sub(placeholder, answer, text, count=1) print(text)
def blast_splice_acceptor(virus, args): """ This function blasts the query Large T against all Large T in csv to start the process of finding the splice acceptor postion for Large T :param virus: dictionary that has all found proteins :param args: command line arguments for data_dir etc :return: file path to blast output """ large_t_orf = str(virus['Large_T_orf_trans']) ID = virus['accession'] out_dir = args['program_files_dir'] data_dir = args['data_dir'] splice_acceptor_dir = os.path.join(out_dir, 'splice_acceptor') if not os.path.isdir(splice_acceptor_dir): os.makedirs(splice_acceptor_dir) blast_subject = os.path.join(data_dir, 'large_t_blast_half.fa') blast_out = os.path.join(splice_acceptor_dir, 'blast_result_splice_acceptor.tab') if os.path.isfile(blast_out): os.remove(blast_out) query_file = os.path.join(splice_acceptor_dir, 'query.fa') with open(query_file, 'a') as query: query.write('>{}\n'.format(ID)) query.write(large_t_orf) try: num_hits = run_blastp(query_file, blast_subject, blast_out) # print('Got {} hits!'.format(num_hits)) except Exception as e: die('EXCEPTION: {}'.format(e)) return blast_out
def main(): """main""" args = get_args() low = args.min high = args.max guesses_allowed = args.guesses inputs = args.inputs random.seed(args.seed) if low < 1: die('--min "{}" cannot be lower than 1'.format(low)) if guesses_allowed < 1: die('--guesses "{}" cannot be lower than 1'.format(guesses_allowed)) if low > high: die('--min "{}" is higher than --max "{}"'.format(low, high)) secret = random.randint(low, high) prompt = 'Guess a number between {} and {} (q to quit): '.format(low, high) num_guesses = 0 while True: guess = inputs.pop(0) if inputs else input(prompt) num_guesses += 1 if re.match('q(uit)?', guess.lower()): print('Now you will never know the answer.') sys.exit() # Method 1: test if the guess is a digit if not guess.isdigit(): print('"{}" is not a number.'.format(guess)) continue num = int(guess) # Method 2: try/except num = 0 try: num = int(guess) except: warn('"{}" is not an integer'.format(guess)) continue if not low <= num <= high: print('Number "{}" is not in the allowed range'.format(num)) elif num == secret: print('"{}" is correct. You win!'.format(num)) break else: print('"{}" is too {}.'.format(num, 'low' if num < secret else 'high')) if num_guesses >= guesses_allowed: print( 'Too many guesses, loser! The number was "{}."'.format(secret)) sys.exit(1)
def main(): """Make a jazz noise here""" args = get_args() out_file = args.program.strip().replace('-', '_') if not out_file: die('Not a usable filename "{}"'.format(out_file)) if not out_file.endswith('.py'): out_file += '.py' if os.path.isfile(out_file) and not args.force: answer = input('"{}" exists. Overwrite? [yN] '.format(out_file)) if not answer.lower().startswith('y'): print('Will not overwrite. Bye!') sys.exit() out_fh = open(out_file, 'w') preamble = PREAMBLE.format(os.getenv('USER'), str(date.today())) text = SIMPLE if args.simple else ARGPARSE out_fh.write(preamble) out_fh.write(text) subprocess.run(['chmod', '+x', out_file]) print('Done, see new script "{}."'.format(out_file))
def blast_for_known_spliced_donor(virus, args): """ This function blasts the query small t against all E1s in the csv file start the process of finding the splice donor position for Large T :param virus: dictionary that has all found proteins so far :param args: command line arguments for data_dir etc :return: file path to blast output """ small_t_trans = str(virus['small_t'][-2]) ID = virus['accession'] out_dir = args['program_files_dir'] data_dir = args['data_dir'] blast_small_t_dir = os.path.join(out_dir, 'blast_small_t') if not os.path.isdir(blast_small_t_dir): os.makedirs(blast_small_t_dir) blast_subject = os.path.join(data_dir, 'small_t_blast_half.fa') blast_out = os.path.join(blast_small_t_dir, 'blast_result.tab') if os.path.isfile(blast_out): os.remove(blast_out) query_file = os.path.join(blast_small_t_dir, 'query.fa') with open(query_file, 'a') as query: query.write('>{}\n'.format(ID)) query.write(small_t_trans) try: num_hits = run_blastp(query_file, blast_subject, blast_out) #print('Got {} hits!'.format(num_hits)) except Exception as e: die('EXCEPTION: {}'.format(e)) return blast_out
def main(): """Make a jazz noise here""" args = get_args() file = args.file out_file = args.outfile Entrez.email = args.email logging.basicConfig( filename='.log', filemode='w', level=logging.DEBUG if args.debug else logging.CRITICAL) ok_taxa = get_tax_names(args.taxa) or die('No usable taxa') logging.debug('OK tax = {}'.format(ok_taxa)) logging.debug('Writing to "{}"'.format(out_file)) out_fh = open(out_file, 'wt') num_checked, num_taken = 0, 0 for rec in SeqIO.parse(args.file, 'fasta'): num_checked += 1 print('{:4}: {}'.format(num_checked, rec.id)) handle = Entrez.efetch(db='nucleotide', id=rec.id, rettype='gb', retmode='text') for record in SeqIO.parse(handle, 'genbank'): tax = set(record.annotations.get('taxonomy')) tax_hit = ok_taxa.intersection(tax) if tax_hit: logging.debug('Taking {} ({})'.format(rec.id, tax_hit)) num_taken += 1 SeqIO.write(record, 'fasta', out_fh) print('Done, checked {}, wrote {} to "{}"'.format(num_checked, num_taken, out_file))
def main(): """Make a jazz noise here""" args = get_args() in_dir = args.dir out_dir = args.outdir chapters = args.chapters appendix = args.appendix cur_dir = os.path.dirname(sys.argv[0]) if not chapters: chapters = os.path.join(cur_dir, 'chapters.txt') if not os.path.isfile(chapters): die('--chapters "{}" is not a file'.format(chapters)) if not appendix: appendix = os.path.join(cur_dir, 'appendices.txt') if appendix and not os.path.isfile(appendix): die('--appendix "{}" is not a file'.format(appendix)) book_file = os.path.join(out_dir, 'book.md') with open(book_file, 'wt') as fh: fh.write('\\setcounter{tocdepth}{2}\\tableofcontents\n\\newpage\n\n') intro = 'README.md' if os.path.isfile(intro): fh.write(open(intro).read()) fh.write('\n\\newpage\n\n') def read_chapters(): for line in open(chapters): if not line.startswith('#'): yield line.rstrip() for i, dir_name in enumerate(read_chapters(), 1): dir_path = os.path.join(in_dir, dir_name) if not os.path.isdir(dir_path): die('Bad dir "{}"'.format(dir_path)) print('Chapter {}: {}'.format(i, dir_name)) readme = os.path.join(dir_path, 'README.md') if os.path.isfile(readme): print('\tREADME') chapter = 'Chapter {}: '.format(i) text = open(readme).read() text = re.sub(r'^#\s+', '# ' + chapter, text) fh.write(text + '\n\\newpage\n\n') for ext in ['py', 'sh', 'txt']: solution = os.path.join(in_dir, dir_name, 'solution.' + ext) if os.path.isfile(solution): print('\tSOLUTION') fh.write('## Solution\n\n') fh.write('````\n') numbered = getoutput('cat -n {}'.format(solution)) fh.write(numbered) fh.write('\n````\n') fh.write('\n\\newpage\n\n') solution_md = os.path.join(in_dir, dir_name, 'discussion.md') if os.path.isfile(solution_md): print('\tDISCUSSION') fh.write('## Discussion\n\n') fh.write(open(solution_md).read()) fh.write('\n\\newpage\n\n') if appendix: for i, dir_name in enumerate(map(str.rstrip, open(appendix)), 1): print('Appendix {}: {}'.format(i, dir_name)) readme = os.path.join(in_dir, 'appendix', dir_name, 'README.md') if os.path.isfile(readme): print('\tREADME') header = 'Appendix {}: '.format(i) text = open(readme).read() text = re.sub(r'^#\s+', '# ' + header, text) fh.write(text + '\n\\newpage\n\n') cmd = 'pandoc {} --pdf-engine=xelatex -o {}' rv, out = getstatusoutput(cmd.format(book_file, args.outfile)) if rv != 0: die('Error: {}'.format(out))
def main(): """Make a jazz noise here""" args = get_args() for file in args.file: basename = os.path.basename(file) x_axis = args.x_axis y_axis = args.y_axis s_axis = args.s_axis title = args.title or basename sep = args.sep if not sep: _, ext = os.path.splitext(basename) sep = ',' if ext == '.csv' else '\t' df = pd.read_csv(file, sep=sep) col_names = df.columns nrows, ncols = df.shape if args.list_cols: print('Columns in "{}"\n{}\n'.format( basename, '\n'.join( map(lambda t: '{:3}: {}'.format(*t), enumerate(col_names, 1))))) sys.exit(0) if x_axis and x_axis.isdigit() and not x_axis in col_names: x_axis = col_names[int(x_axis) - 1] if y_axis and y_axis.isdigit() and not y_axis in col_names: y_axis = col_names[int(y_axis) - 1] if s_axis and s_axis.isdigit() and not s_axis in col_names: s_axis = col_names[int(s_axis) - 1] if not x_axis and ncols >= 1: x_axis = col_names[0] if not y_axis and ncols >= 2: y_axis = col_names[1] if not s_axis and ncols >= 3: s_axis = col_names[2] if not x_axis in col_names: die('--x_axis "{}" not in {}'.format(x_axis, ', '.join(col_names))) if not y_axis in col_names: die('--y_axis "{}" not in {}'.format(y_axis, ', '.join(col_names))) if not s_axis in col_names: die('--s_axis "{}" not in {}'.format(s_axis, ', '.join(col_names))) if args.x_exclude: for exclude in args.x_exclude: df.drop(df[df[x_axis] == exclude].index, inplace=True) if args.y_exclude: for exclude in args.y_exclude: df.drop(df[df[y_axis] == exclude].index, inplace=True) x_label = args.x_label or x_axis y_label = args.y_label or y_axis if args.sort: df.sort_values(by=[y_axis, x_axis], ascending=[False, False], inplace=True) x = df[x_axis] y = df[y_axis] img_width = args.image_width or 5 + len(x.unique()) / 5 img_height = args.image_height or len(y.unique()) / 4 plt.figure(figsize=(img_width, img_height)) plt.scatter(x=x, y=y, s=df[s_axis] * args.multiplier, alpha=0.5) plt.xticks(rotation=45, ha='right') #plt.yticks(rotation=45, ha='right') plt.gcf().subplots_adjust(bottom=.4, left=.4) plt.xlabel(x_label) plt.ylabel(y_label) plt.title(title) out_file = args.outfile if not out_file: dir_name = os.path.dirname(os.path.abspath(file)) root, _ = os.path.splitext(os.path.basename(file)) out_file = os.path.join(dir_name, root + '.' + args.format) plt.savefig(out_file) if args.open_image: plt.show() print('Done, see "{}"'.format(out_file))
def main(): """Make a jazz noise here""" args = get_args() in_dir = args.dir out_dir = args.outdir chapters = args.chapters appendix = args.appendix cur_dir = os.path.dirname(sys.argv[0]) if not chapters: chapters = os.path.join(cur_dir, 'chapters.txt') if not os.path.isfile(chapters): die('--chapters "{}" is not a file'.format(chapters)) if not appendix: appendix = os.path.join(cur_dir, 'appendices.txt') if appendix and not os.path.isfile(appendix): die('--appendix "{}" is not a file'.format(appendix)) book_file = os.path.join(out_dir, 'book.md') chapter_list = list( map(str.rstrip, filter(lambda s: s[0] != '#', open(chapters)))) make_outline(in_dir, chapter_list) with open(book_file, 'wt') as fh: #fh.write('\\setcounter{tocdepth}{2}\\tableofcontents\n\\newpage\n\n') title = 'TITLE.md' if os.path.isfile(title): fh.write(open(title).read()) fh.write('\n\n\\newpage\n\n') fh.write('\\setcounter{tocdepth}{2}\\tableofcontents\n\n') fh.write('\n\\newpage\n\n') top_readme = 'README.md' if os.path.isfile(top_readme): fh.write(open(top_readme).read()) fh.write('\n\\newpage\n\n') outline = 'OUTLINE.md' if os.path.isfile(outline): fh.write(open(outline).read()) fh.write('\n\\newpage\n\n') for i, dir_name in enumerate(chapter_list, 1): print('Chapter {}: {}'.format(i, dir_name)) readme = os.path.join(in_dir, dir_name, 'README.md') if os.path.isfile(readme): print('\tREADME') chapter = 'Chapter {}: '.format(i) text = open(readme).read() text = re.sub(r'^#\s+', '# ' + chapter, text) fh.write(text + '\n\\newpage\n\n') solution_py = os.path.join(in_dir, dir_name, 'solution.py') if os.path.isfile(solution_py): print('\tSOLUTION') fh.write('## Solution\n\n') fh.write('````\n') numbered = getoutput('cat -n {}'.format(solution_py)) fh.write(numbered) #fh.write(open(solution_py).read()) fh.write('\n````\n') fh.write('\n\\newpage\n\n') else: print('\t>>>>>>> MISSING SOLUTION <<<<<<<<\n\n') solution_md = os.path.join(in_dir, dir_name, 'discussion.md') if os.path.isfile(solution_md): print('\tDISCUSSION') fh.write('## Discussion\n\n') fh.write(open(solution_md).read()) fh.write('\n\\newpage\n\n') if appendix: for i, dir_name in enumerate(map(str.rstrip, open(appendix)), 1): print('Appendix {}: {}'.format(i, dir_name)) readme = os.path.join(in_dir, 'appendix', dir_name, 'README.md') if os.path.isfile(readme): print('\tREADME') header = 'Appendix {}: '.format(i) text = open(readme).read() text = re.sub(r'^#\s+', '# ' + header, text) fh.write(text + '\n\\newpage\n\n') cmd = 'pandoc {} --pdf-engine=xelatex -o {}' rv, out = getstatusoutput(cmd.format(book_file, args.outfile)) if rv != 0: die('Error: {}'.format(out))
def main(): """Make a jazz noise here""" args = get_args() rank_wanted = args.rank min_pct = args.min def lines(fh): for line in map(lambda s: s.rstrip('\n'), fh): if line and not line.startswith('#'): yield line num_root, num_unclassified = 0, 0 assigned = [] for i, fh in enumerate(args.file, start=1): basename = os.path.basename(fh.name) print('{:3}: {}'.format(i, basename)) reader = csv.DictReader(lines(fh), delimiter='\t') for rec in reader: try: reads = int(rec['reads']) except: continue tax_name = rec['taxName'].strip() if tax_name == 'root': num_root = reads continue elif tax_name == 'unclassified': num_unclassified = reads continue elif rec['rank'] == rank_wanted: continue total_reads = num_root + num_unclassified if total_reads == 0: die('Failed to find root/unclassified') pct = reads / total_reads if min_pct and pct < min_pct: continue assigned.append({ 'sample': basename, 'tax_id': rec['taxID'], 'tax_name': tax_name, 'pct': pct, 'reads': reads }) if not assigned: die('No data!') df = pd.DataFrame(assigned) if args.dataout: df.to_csv(args.dataout, index=False) num_found = len(assigned) print('At a {}% found {} {}'.format(min_pct, num_found, rank_wanted)) if num_found > 1000: die('Too many to plot') x = df['sample'] y = df['tax_name'] plt.figure(figsize=(5 + len(x.unique()) / 5, len(y.unique()) / 3)) plt.scatter(x, y, s=df['pct'], alpha=0.5) plt.xticks(rotation=45, ha='right') plt.gcf().subplots_adjust(bottom=.4, left=.4) plt.ylabel('Organism') plt.xlabel('Sample') if args.title: plt.title(args.title) plt.savefig(args.outfile) print('Done, see outfile "{}"'.format(args.outfile)) if args.open_image: plt.show()
def all_combos(puzzle): """Find all combos in puzzle""" num_rows = len(puzzle) num_cols = len(puzzle[0]) if not all([len(row) == num_cols for row in puzzle]): die('Uneven number of columns') combos = [] # Horizontal for row in puzzle: combos.append(row) # Vertical for col_num in range(num_cols): col = [puzzle[row_num][col_num] for row_num in range(num_rows)] combos.append(col) # Diagonals Up for row_i in range(0, num_rows): diag = [] col_num = 0 for row_j in range(row_i, -1, -1): diag.append(puzzle[row_j][col_num]) col_num += 1 if diag: combos.append(diag) for col_i in range(1, num_cols): diag = [] col_num = col_i for row_num in range(num_rows - 1, -1, -1): diag.append(puzzle[row_num][col_num]) col_num += 1 if col_num == num_cols: break if diag: combos.append(diag) # Diagonals Down for row_i in range(0, num_rows): diag = [] col_num = 0 for row_j in range(row_i, num_rows): diag.append(puzzle[row_j][col_num]) col_num += 1 if col_num == num_cols: break if diag: combos.append(diag) for col_i in range(0, num_cols): diag = [] col_num = col_i for row_num in range(0, num_rows): diag.append(puzzle[row_num][col_num]) col_num += 1 if col_num == num_cols: break if diag: combos.append(diag) combos.extend([list(reversed(c)) for c in combos]) return combos