示例#1
0
def get_kmers(words, k, min_words):
    """ Find all words sharing kmers"""

    if k <= 1:
        die('-k "{}" must be greater than 1'.format(k))

    shared = defaultdict(list)
    for word in words:
        for kmer in [word[i:i + k] for i in range(len(word) - k + 1)]:
            shared[kmer].append(word)

    # Select kmers having enough words (can't use `pop`!)

    # Method 1: for loop
    ok = dict()
    for kmer in shared:
        if len(shared[kmer]) >= min_words:
            ok[kmer] = shared[kmer]

    # Method 2: list comprehension
    # ok = dict([(kmer, shared[kmer]) for kmer in shared
    #            if len(shared[kmer]) >= min_words])

    # Method 3: map/filter
    # ok = dict(
    #     map(lambda kmer: (kmer, shared[kmer]),
    #         filter(lambda kmer: len(shared[kmer]) >= min_words,
    #                shared.keys())))

    return ok
示例#2
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    fh = args.file
    out_file = args.outfile or os.path.basename(fh.name) + '.gv'

    nodes, edges = parse_tree(fh)

    if not nodes and not edges:
        die('No nodes or edges in "{}".'.format(fh.name))

    dot = Digraph(comment='Tree')

    # keys are initials which we don't need
    for _, name in nodes.items():
        dot.node(name)

    for n1, n2 in edges:
        # see if node has alias in nodes, else use node itself
        n1 = nodes.get(n1, n1)
        n2 = nodes.get(n2, n2)
        dot.edge(n1, n2)

    dot.render(out_file, view=args.view)

    print('Done, see output in "{}".'.format(out_file))
示例#3
0
def read_file(file):
    """Return the contents of a file"""

    if not os.path.isfile(file):
        die('"{}" is not a file'.format(file))

    return open(file).read().split()
示例#4
0
def blast_main_orfs(genome, args):
    """
    This functions writes the ORFs found from trans_orf() to a fasta file and calls
    run_blastp

    :param genome: linearized based on L1 genome
    :param args: command line arguments for data_dir, min_prot_len etc.
    :return: a list of two file paths, the first element being the blast output and the
    second being the ORFs found
    """
    orfs = find_orfs_with_trans(genome, 1, args['min_prot_len'])
    if not orfs:
        raise Exception('No ORFs, must stop.')

    orfs_fa = os.path.join(args['program_files_dir'], 'orfs.fa')
    orfs_fh = open(orfs_fa, 'wt')

    for orf in orfs:
        orfs_fh.write('\n'.join(['>' + str(orfs[orf]), orf, '']))
    orfs_fh.close()

    blast_sub = os.path.join(args['data_dir'], 'main_blast.fa')
    blast_out = os.path.join(args['program_files_dir'],
                             'blast_results_main.tab')

    if os.path.isfile(blast_out):
        os.remove(blast_out)
    try:
        num_hits = run_blastp(orfs_fa, blast_sub, blast_out)
        #print('Got {} hits!'.format(num_hits))
    except Exception as e:
        die('EXCEPTION: {}'.format(e))

    return blast_out, orfs_fa
def main():
    """Make a jazz noise here"""

    args = get_args()
    text = args.text
    char = args.character
    width = args.width
    min_val = args.minimum

    if len(char) != 1:
        die('--character "{}" must be one character'.format(char))

    if os.path.isfile(text):
        text = open(text).read()
    if args.case_insensitive:
        text = text.upper()

    freqs = Counter(filter(lambda c: re.match(r'\w', c), list(text)))
    high = max(freqs.values())
    scale = high / width if high > width else 1
    items = map(
        lambda t:
        (t[1], t[0]), sorted([
            (v, k) for k, v in freqs.items()
        ], reverse=True)) if args.frequency_sort else sorted(freqs.items())

    for c, num in items:
        if num < min_val:
            continue
        print('{} {:6} {}'.format(c, num, char * int(num / scale)))
示例#6
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    out_file = args.outfile
    num_days = args.number_days
    out_fh = open(out_file, 'wt') if out_file else sys.stdout

    days = {
        12: 'Twelve drummers drumming',
        11: 'Eleven pipers piping',
        10: 'Ten lords a leaping',
        9: 'Nine ladies dancing',
        8: 'Eight maids a milking',
        7: 'Seven swans a swimming',
        6: 'Six geese a laying',
        5: 'Five gold rings',
        4: 'Four calling birds',
        3: 'Three French hens',
        2: 'Two turtle doves',
        1: 'a partridge in a pear tree',
    }

    ordinal = {
        12: 'twelfth',
        11: 'eleven',
        10: 'tenth',
        9: 'ninth',
        8: 'eighth',
        7: 'seventh',
        6: 'sixth',
        5: 'fifth',
        4: 'fourth',
        3: 'third',
        2: 'second',
        1: 'first',
    }

    if not num_days in days:
        die('Cannot sing "{}" days'.format(num_days))

    for i in range(1, num_days + 1):
        first = 'On the {} day of Christmas,\nMy true love gave to me,'
        out_fh.write(first.format(ordinal[i]) + '\n')
        for j in reversed(range(1, i + 1)):
            if j == 1:
                if i == 1:
                    out_fh.write('{}.\n'.format(days[j].title()))
                else:
                    out_fh.write('And {}.\n'.format(days[j]))
            else:
                out_fh.write('{},\n'.format(days[j]))

        if i < max(days.keys()):
            out_fh.write('\n')
示例#7
0
def main():
    """Make a jazz noise here"""
    args = get_args()
    text = args.text
    word_list = args.wordlist

    if not os.path.isfile(word_list):
        die('--wordlist "{}" is not a file'.format(word_list))

    logging.basicConfig(
        filename='.log',
        filemode='w',
        level=logging.DEBUG if args.debug else logging.CRITICAL)

    words = defaultdict(set)
    for line in open(word_list):
        for word in line.split():
            clean = re.sub('[^a-z0-9]', '', word.lower())
            if len(clean) == 1 and clean not in 'ai':
                continue
            words[len(clean)].add(clean)

    text_len = len(text)
    counts = Counter(text)
    anagrams = set()
    lengths = list(words.keys())
    for i in range(1, args.num_combos + 1):
        key_combos = list(
            filter(
                lambda t: sum(t) == text_len,
                set(
                    map(lambda t: tuple(sorted(t)),
                        combinations(chain(lengths, lengths), i)))))

        for keys in key_combos:
            logging.debug('Searching keys {}'.format(keys))
            word_combos = list(product(*list(map(lambda k: words[k], keys))))

            for t in word_combos:
                if Counter(''.join(t)) == counts:
                    for p in filter(
                            lambda x: x != text,
                            map(lambda x: ' '.join(x), permutations(t))):
                        anagrams.add(p)

            logging.debug('# anagrams = {}'.format(len(anagrams)))

    logging.debug('Finished searching')

    if anagrams:
        print('{} ='.format(text))
        for i, t in enumerate(sorted(anagrams), 1):
            print('{:4}. {}'.format(i, t))
    else:
        print('No anagrams for "{}".'.format(text))
示例#8
0
def main():
    orfs = 'orfs_E5_HPV6.fa'
    e5 = 'blast_E5.fa'
    outfile = 'e5_results.tab'

    try:
        num_hits = run_blastp(orfs, e5, outfile, evalue=1e-10)
        print('Got {} hits!'.format(num_hits))
    except Exception as e:
        die('EXCEPTION: {}'.format(e))

    print('All was good!')
示例#9
0
def main():
    """Make a jazz noise here"""
    args = get_args()
    fasta = args.fasta

    if not os.path.isfile(fasta):
        die('"{}" is not a file'.format(fasta))

    alignment = AlignIO.read(fasta, 'fasta')
    print(alignment)
    alignment_length = alignment.get_alignment_length()
    conserved = {}
    dashes = {}
    seq_by_id = dict([(rec.id, str(rec.seq)) for rec in alignment])

    for i, rec in enumerate(alignment):
        seq = str(rec.seq)
        match = re.search(r'^([-]+)', seq)
        num_dashes = 0
        if match:
            dash = match.group(1)
            num_dashes = len(dash)

        dashes[rec.id] = num_dashes

    pp(dashes)

    for i in range(0, alignment_length):
        col = alignment[:, i]
        conserved[i] = col.count('M')

    #pp(conserved)
    max_num_seqs = max(conserved.values())
    print(max_num_seqs)
    seqs_at_max = list(
        filter(lambda t: t[1] == max_num_seqs, conserved.items()))

    if len(seqs_at_max) > 1:
        warn('Hey, look at this!')

    best_col = seqs_at_max[0][0]
    print(seqs_at_max)
    prefix = seq_by_id['unknown'][0:best_col]

    if len(prefix) * '-' == prefix:
        print('all dashes, do nothing')
    elif prefix.count('-') == 0:
        print('E6 = ', seq_by_id['unknown'][best_col:])
    else:
        print('remove {} from beginning'.format(len(seq.replace('-', ''))))
示例#10
0
def get_words(file):
    """Get words from input file"""

    if not os.path.isfile(file):
        die('"{}" is not a file')

    words = set()
    for line in open(file):
        for word in line.split():
            words.add(re.sub('[^a-zA-Z0-9]', '', word.lower()))

    if not words:
        die('No usable words in "{}"'.format(file))

    return words
示例#11
0
def main():
    """Make a jazz noise here"""
    args = get_args()
    file = args.file
    shift = args.shift
    ucase = 'ABCDEFG'
    lcase = 'abcdefg'
    num_notes = 7

    if not 1 < abs(shift) <= 8:
        die('--shift "{}" must be between 2 and 8'.format(shift))

    if not os.path.isfile(file):
        die('"{}" is not a file'.format(file))

    # account for interval where a 2nd (-s 2) is a move of one note
    shift = shift - 1 if shift > 0 else shift + 1

    def transpose(note):
        if note in lcase:
            pos = lcase.index(note)
            tran = (pos + shift) % num_notes
            return lcase[tran]
        elif note in ucase:
            pos = ucase.index(note)
            tran = (pos + shift) % num_notes
            return ucase[tran]
        else:
            return note

    for line in open(file):
        line = line.rstrip()

        if line.startswith('K:'):
            key = line[2]
            print('K:' + transpose(key))
        elif (line.startswith('<') and line.endswith('>')) or re.match(
                '[A-Z]:\s?', line):
            print(line)
        else:
            for char in line.rstrip():
                print(transpose(char), end='')

            print()
示例#12
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    start = args.start
    fh = args.wordlist
    distance = args.max_distance

    random.seed(args.seed)

    logging.basicConfig(
        filename='.log',
        filemode='w',
        level=logging.DEBUG if args.debug else logging.CRITICAL)

    logging.debug('file = %s', fh.name)

    words = fh.read().splitlines()

    if not start:
        start = random.choice(words)

    if not start in words:
        die('Unknown word "{}"'.format(start))

    def find_close(word):
        l = len(word)
        low, high = l - distance, l + distance
        test = filter(lambda w: low <= len(w) <= high, words)
        return filter(lambda w: dist(word, w) <= distance, test)

    chain = [start]
    for _ in range(args.iterations - 1):
        close = list(filter(lambda w: w not in chain, find_close(chain[-1])))
        if not close:
            warn('Failed to find more words!')
            break

        next_word = random.choice(close)
        chain.append(next_word)

    for i, link in enumerate(chain, start=1):
        print('{:3}: {}'.format(i, link))
示例#13
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    inputs = args.inputs
    regex = re.compile('(<([^>])+>)')
    text = args.file.read().rstrip()
    blanks = list(regex.finditer(text))

    if not blanks:
        die('File "{}" has no placeholders'.format(args.file.name))

    for match in blanks:
        placeholder = match.group(1)
        name = match.group(2)
        answer = inputs.pop(0) if inputs else input('{}: '.format(name))
        text = re.sub(placeholder, answer, text, count=1)

    print(text)
示例#14
0
def blast_splice_acceptor(virus, args):
    """
    This function blasts the query Large T  against all Large T in csv to start the
    process of finding the splice acceptor postion for Large T

    :param virus: dictionary that has all found proteins
    :param args: command line arguments for data_dir etc
    :return: file path to blast output
    """
    large_t_orf = str(virus['Large_T_orf_trans'])
    ID = virus['accession']
    out_dir = args['program_files_dir']
    data_dir = args['data_dir']

    splice_acceptor_dir = os.path.join(out_dir, 'splice_acceptor')
    if not os.path.isdir(splice_acceptor_dir):
        os.makedirs(splice_acceptor_dir)

    blast_subject = os.path.join(data_dir, 'large_t_blast_half.fa')
    blast_out = os.path.join(splice_acceptor_dir,
                             'blast_result_splice_acceptor.tab')

    if os.path.isfile(blast_out):
        os.remove(blast_out)

    query_file = os.path.join(splice_acceptor_dir, 'query.fa')

    with open(query_file, 'a') as query:
        query.write('>{}\n'.format(ID))
        query.write(large_t_orf)

    try:
        num_hits = run_blastp(query_file, blast_subject, blast_out)
        # print('Got {} hits!'.format(num_hits))
    except Exception as e:
        die('EXCEPTION: {}'.format(e))

    return blast_out
示例#15
0
def main():
    """main"""
    args = get_args()
    low = args.min
    high = args.max
    guesses_allowed = args.guesses
    inputs = args.inputs
    random.seed(args.seed)

    if low < 1:
        die('--min "{}" cannot be lower than 1'.format(low))

    if guesses_allowed < 1:
        die('--guesses "{}" cannot be lower than 1'.format(guesses_allowed))

    if low > high:
        die('--min "{}" is higher than --max "{}"'.format(low, high))

    secret = random.randint(low, high)
    prompt = 'Guess a number between {} and {} (q to quit): '.format(low, high)
    num_guesses = 0

    while True:
        guess = inputs.pop(0) if inputs else input(prompt)
        num_guesses += 1

        if re.match('q(uit)?', guess.lower()):
            print('Now you will never know the answer.')
            sys.exit()

        # Method 1: test if the guess is a digit
        if not guess.isdigit():
            print('"{}" is not a number.'.format(guess))
            continue
        num = int(guess)

        # Method 2: try/except
        num = 0
        try:
            num = int(guess)
        except:
            warn('"{}" is not an integer'.format(guess))
            continue

        if not low <= num <= high:
            print('Number "{}" is not in the allowed range'.format(num))
        elif num == secret:
            print('"{}" is correct. You win!'.format(num))
            break
        else:
            print('"{}" is too {}.'.format(num,
                                           'low' if num < secret else 'high'))

        if num_guesses >= guesses_allowed:
            print(
                'Too many guesses, loser! The number was "{}."'.format(secret))
            sys.exit(1)
示例#16
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    out_file = args.program.strip().replace('-', '_')

    if not out_file: die('Not a usable filename "{}"'.format(out_file))

    if not out_file.endswith('.py'): out_file += '.py'

    if os.path.isfile(out_file) and not args.force:
        answer = input('"{}" exists.  Overwrite? [yN] '.format(out_file))
        if not answer.lower().startswith('y'):
            print('Will not overwrite. Bye!')
            sys.exit()

    out_fh = open(out_file, 'w')
    preamble = PREAMBLE.format(os.getenv('USER'), str(date.today()))
    text = SIMPLE if args.simple else ARGPARSE

    out_fh.write(preamble)
    out_fh.write(text)
    subprocess.run(['chmod', '+x', out_file])
    print('Done, see new script "{}."'.format(out_file))
示例#17
0
def blast_for_known_spliced_donor(virus, args):
    """
    This function blasts the query small t against all E1s in the csv file start the
    process of finding the splice donor position for Large T

    :param virus: dictionary that has all found proteins so far
    :param args: command line arguments for data_dir etc
    :return: file path to blast output
    """
    small_t_trans = str(virus['small_t'][-2])
    ID = virus['accession']
    out_dir = args['program_files_dir']
    data_dir = args['data_dir']

    blast_small_t_dir = os.path.join(out_dir, 'blast_small_t')
    if not os.path.isdir(blast_small_t_dir):
        os.makedirs(blast_small_t_dir)

    blast_subject = os.path.join(data_dir, 'small_t_blast_half.fa')
    blast_out = os.path.join(blast_small_t_dir, 'blast_result.tab')

    if os.path.isfile(blast_out):
        os.remove(blast_out)

    query_file = os.path.join(blast_small_t_dir, 'query.fa')

    with open(query_file, 'a') as query:
        query.write('>{}\n'.format(ID))
        query.write(small_t_trans)

    try:
        num_hits = run_blastp(query_file, blast_subject, blast_out)
        #print('Got {} hits!'.format(num_hits))
    except Exception as e:
        die('EXCEPTION: {}'.format(e))
    return blast_out
示例#18
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    file = args.file
    out_file = args.outfile
    Entrez.email = args.email

    logging.basicConfig(
        filename='.log',
        filemode='w',
        level=logging.DEBUG if args.debug else logging.CRITICAL)

    ok_taxa = get_tax_names(args.taxa) or die('No usable taxa')
    logging.debug('OK tax = {}'.format(ok_taxa))

    logging.debug('Writing to "{}"'.format(out_file))
    out_fh = open(out_file, 'wt')
    num_checked, num_taken = 0, 0

    for rec in SeqIO.parse(args.file, 'fasta'):
        num_checked += 1
        print('{:4}: {}'.format(num_checked, rec.id))

        handle = Entrez.efetch(db='nucleotide',
                               id=rec.id,
                               rettype='gb',
                               retmode='text')

        for record in SeqIO.parse(handle, 'genbank'):
            tax = set(record.annotations.get('taxonomy'))
            tax_hit = ok_taxa.intersection(tax)
            if tax_hit:
                logging.debug('Taking {} ({})'.format(rec.id, tax_hit))
                num_taken += 1
                SeqIO.write(record, 'fasta', out_fh)

    print('Done, checked {}, wrote {} to "{}"'.format(num_checked, num_taken,
                                                      out_file))
示例#19
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    in_dir = args.dir
    out_dir = args.outdir
    chapters = args.chapters
    appendix = args.appendix
    cur_dir = os.path.dirname(sys.argv[0])

    if not chapters:
        chapters = os.path.join(cur_dir, 'chapters.txt')

    if not os.path.isfile(chapters):
        die('--chapters "{}" is not a file'.format(chapters))

    if not appendix:
        appendix = os.path.join(cur_dir, 'appendices.txt')

    if appendix and not os.path.isfile(appendix):
        die('--appendix "{}" is not a file'.format(appendix))

    book_file = os.path.join(out_dir, 'book.md')

    with open(book_file, 'wt') as fh:
        fh.write('\\setcounter{tocdepth}{2}\\tableofcontents\n\\newpage\n\n')

        intro = 'README.md'
        if os.path.isfile(intro):
            fh.write(open(intro).read())
            fh.write('\n\\newpage\n\n')

        def read_chapters():
            for line in open(chapters):
                if not line.startswith('#'):
                    yield line.rstrip()

        for i, dir_name in enumerate(read_chapters(), 1):
            dir_path = os.path.join(in_dir, dir_name)
            if not os.path.isdir(dir_path):
                die('Bad dir "{}"'.format(dir_path))

            print('Chapter {}: {}'.format(i, dir_name))
            readme = os.path.join(dir_path, 'README.md')
            if os.path.isfile(readme):
                print('\tREADME')
                chapter = 'Chapter {}: '.format(i)
                text = open(readme).read()
                text = re.sub(r'^#\s+', '# ' + chapter, text)
                fh.write(text + '\n\\newpage\n\n')

            for ext in ['py', 'sh', 'txt']:
                solution = os.path.join(in_dir, dir_name, 'solution.' + ext)
                if os.path.isfile(solution):
                    print('\tSOLUTION')
                    fh.write('## Solution\n\n')
                    fh.write('````\n')
                    numbered = getoutput('cat -n {}'.format(solution))
                    fh.write(numbered)
                    fh.write('\n````\n')
                    fh.write('\n\\newpage\n\n')

            solution_md = os.path.join(in_dir, dir_name, 'discussion.md')
            if os.path.isfile(solution_md):
                print('\tDISCUSSION')
                fh.write('## Discussion\n\n')
                fh.write(open(solution_md).read())
                fh.write('\n\\newpage\n\n')

        if appendix:
            for i, dir_name in enumerate(map(str.rstrip, open(appendix)), 1):
                print('Appendix {}: {}'.format(i, dir_name))
                readme = os.path.join(in_dir, 'appendix', dir_name,
                                      'README.md')
                if os.path.isfile(readme):
                    print('\tREADME')
                    header = 'Appendix {}: '.format(i)
                    text = open(readme).read()
                    text = re.sub(r'^#\s+', '# ' + header, text)
                    fh.write(text + '\n\\newpage\n\n')

    cmd = 'pandoc {} --pdf-engine=xelatex -o {}'
    rv, out = getstatusoutput(cmd.format(book_file, args.outfile))

    if rv != 0:
        die('Error: {}'.format(out))
示例#20
0
def main():
    """Make a jazz noise here"""

    args = get_args()

    for file in args.file:
        basename = os.path.basename(file)
        x_axis = args.x_axis
        y_axis = args.y_axis
        s_axis = args.s_axis
        title = args.title or basename
        sep = args.sep

        if not sep:
            _, ext = os.path.splitext(basename)
            sep = ',' if ext == '.csv' else '\t'

        df = pd.read_csv(file, sep=sep)
        col_names = df.columns
        nrows, ncols = df.shape

        if args.list_cols:
            print('Columns in "{}"\n{}\n'.format(
                basename, '\n'.join(
                    map(lambda t: '{:3}: {}'.format(*t),
                        enumerate(col_names, 1)))))
            sys.exit(0)

        if x_axis and x_axis.isdigit() and not x_axis in col_names:
            x_axis = col_names[int(x_axis) - 1]

        if y_axis and y_axis.isdigit() and not y_axis in col_names:
            y_axis = col_names[int(y_axis) - 1]

        if s_axis and s_axis.isdigit() and not s_axis in col_names:
            s_axis = col_names[int(s_axis) - 1]

        if not x_axis and ncols >= 1:
            x_axis = col_names[0]

        if not y_axis and ncols >= 2:
            y_axis = col_names[1]

        if not s_axis and ncols >= 3:
            s_axis = col_names[2]

        if not x_axis in col_names:
            die('--x_axis "{}" not in {}'.format(x_axis, ', '.join(col_names)))

        if not y_axis in col_names:
            die('--y_axis "{}" not in {}'.format(y_axis, ', '.join(col_names)))

        if not s_axis in col_names:
            die('--s_axis "{}" not in {}'.format(s_axis, ', '.join(col_names)))

        if args.x_exclude:
            for exclude in args.x_exclude:
                df.drop(df[df[x_axis] == exclude].index, inplace=True)

        if args.y_exclude:
            for exclude in args.y_exclude:
                df.drop(df[df[y_axis] == exclude].index, inplace=True)

        x_label = args.x_label or x_axis
        y_label = args.y_label or y_axis

        if args.sort:
            df.sort_values(by=[y_axis, x_axis],
                           ascending=[False, False],
                           inplace=True)

        x = df[x_axis]
        y = df[y_axis]
        img_width = args.image_width or 5 + len(x.unique()) / 5
        img_height = args.image_height or len(y.unique()) / 4
        plt.figure(figsize=(img_width, img_height))
        plt.scatter(x=x, y=y, s=df[s_axis] * args.multiplier, alpha=0.5)
        plt.xticks(rotation=45, ha='right')
        #plt.yticks(rotation=45, ha='right')
        plt.gcf().subplots_adjust(bottom=.4, left=.4)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(title)

        out_file = args.outfile
        if not out_file:
            dir_name = os.path.dirname(os.path.abspath(file))
            root, _ = os.path.splitext(os.path.basename(file))
            out_file = os.path.join(dir_name, root + '.' + args.format)

        plt.savefig(out_file)

        if args.open_image:
            plt.show()

        print('Done, see "{}"'.format(out_file))
示例#21
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    in_dir = args.dir
    out_dir = args.outdir
    chapters = args.chapters
    appendix = args.appendix
    cur_dir = os.path.dirname(sys.argv[0])

    if not chapters:
        chapters = os.path.join(cur_dir, 'chapters.txt')

    if not os.path.isfile(chapters):
        die('--chapters "{}" is not a file'.format(chapters))

    if not appendix:
        appendix = os.path.join(cur_dir, 'appendices.txt')

    if appendix and not os.path.isfile(appendix):
        die('--appendix "{}" is not a file'.format(appendix))

    book_file = os.path.join(out_dir, 'book.md')
    chapter_list = list(
        map(str.rstrip, filter(lambda s: s[0] != '#', open(chapters))))

    make_outline(in_dir, chapter_list)

    with open(book_file, 'wt') as fh:
        #fh.write('\\setcounter{tocdepth}{2}\\tableofcontents\n\\newpage\n\n')

        title = 'TITLE.md'
        if os.path.isfile(title):
            fh.write(open(title).read())
            fh.write('\n\n\\newpage\n\n')

        fh.write('\\setcounter{tocdepth}{2}\\tableofcontents\n\n')
        fh.write('\n\\newpage\n\n')

        top_readme = 'README.md'
        if os.path.isfile(top_readme):
            fh.write(open(top_readme).read())
            fh.write('\n\\newpage\n\n')

        outline = 'OUTLINE.md'
        if os.path.isfile(outline):
            fh.write(open(outline).read())
            fh.write('\n\\newpage\n\n')

        for i, dir_name in enumerate(chapter_list, 1):
            print('Chapter {}: {}'.format(i, dir_name))
            readme = os.path.join(in_dir, dir_name, 'README.md')
            if os.path.isfile(readme):
                print('\tREADME')
                chapter = 'Chapter {}: '.format(i)
                text = open(readme).read()
                text = re.sub(r'^#\s+', '# ' + chapter, text)
                fh.write(text + '\n\\newpage\n\n')

            solution_py = os.path.join(in_dir, dir_name, 'solution.py')
            if os.path.isfile(solution_py):
                print('\tSOLUTION')
                fh.write('## Solution\n\n')
                fh.write('````\n')
                numbered = getoutput('cat -n {}'.format(solution_py))
                fh.write(numbered)
                #fh.write(open(solution_py).read())
                fh.write('\n````\n')
                fh.write('\n\\newpage\n\n')
            else:
                print('\t>>>>>>> MISSING SOLUTION <<<<<<<<\n\n')

            solution_md = os.path.join(in_dir, dir_name, 'discussion.md')
            if os.path.isfile(solution_md):
                print('\tDISCUSSION')
                fh.write('## Discussion\n\n')
                fh.write(open(solution_md).read())
                fh.write('\n\\newpage\n\n')

        if appendix:
            for i, dir_name in enumerate(map(str.rstrip, open(appendix)), 1):
                print('Appendix {}: {}'.format(i, dir_name))
                readme = os.path.join(in_dir, 'appendix', dir_name,
                                      'README.md')
                if os.path.isfile(readme):
                    print('\tREADME')
                    header = 'Appendix {}: '.format(i)
                    text = open(readme).read()
                    text = re.sub(r'^#\s+', '# ' + header, text)
                    fh.write(text + '\n\\newpage\n\n')

    cmd = 'pandoc {} --pdf-engine=xelatex -o {}'
    rv, out = getstatusoutput(cmd.format(book_file, args.outfile))

    if rv != 0:
        die('Error: {}'.format(out))
示例#22
0
def main():
    """Make a jazz noise here"""

    args = get_args()
    rank_wanted = args.rank
    min_pct = args.min

    def lines(fh):
        for line in map(lambda s: s.rstrip('\n'), fh):
            if line and not line.startswith('#'):
                yield line

    num_root, num_unclassified = 0, 0
    assigned = []
    for i, fh in enumerate(args.file, start=1):
        basename = os.path.basename(fh.name)
        print('{:3}: {}'.format(i, basename))

        reader = csv.DictReader(lines(fh), delimiter='\t')
        for rec in reader:
            try:
                reads = int(rec['reads'])
            except:
                continue

            tax_name = rec['taxName'].strip()
            if tax_name == 'root':
                num_root = reads
                continue
            elif tax_name == 'unclassified':
                num_unclassified = reads
                continue
            elif rec['rank'] == rank_wanted:
                continue

            total_reads = num_root + num_unclassified
            if total_reads == 0:
                die('Failed to find root/unclassified')

            pct = reads / total_reads
            if min_pct and pct < min_pct:
                continue

            assigned.append({
                'sample': basename,
                'tax_id': rec['taxID'],
                'tax_name': tax_name,
                'pct': pct,
                'reads': reads
            })

    if not assigned:
        die('No data!')

    df = pd.DataFrame(assigned)
    if args.dataout:
        df.to_csv(args.dataout, index=False)

    num_found = len(assigned)
    print('At a {}% found {} {}'.format(min_pct, num_found, rank_wanted))
    if num_found > 1000:
        die('Too many to plot')

    x = df['sample']
    y = df['tax_name']
    plt.figure(figsize=(5 + len(x.unique()) / 5, len(y.unique()) / 3))
    plt.scatter(x, y, s=df['pct'], alpha=0.5)
    plt.xticks(rotation=45, ha='right')
    plt.gcf().subplots_adjust(bottom=.4, left=.4)
    plt.ylabel('Organism')
    plt.xlabel('Sample')
    if args.title:
        plt.title(args.title)

    plt.savefig(args.outfile)

    print('Done, see outfile "{}"'.format(args.outfile))

    if args.open_image:
        plt.show()
示例#23
0
def all_combos(puzzle):
    """Find all combos in puzzle"""

    num_rows = len(puzzle)
    num_cols = len(puzzle[0])

    if not all([len(row) == num_cols for row in puzzle]):
        die('Uneven number of columns')

    combos = []

    # Horizontal
    for row in puzzle:
        combos.append(row)

    # Vertical
    for col_num in range(num_cols):
        col = [puzzle[row_num][col_num] for row_num in range(num_rows)]
        combos.append(col)

    # Diagonals Up
    for row_i in range(0, num_rows):
        diag = []
        col_num = 0
        for row_j in range(row_i, -1, -1):
            diag.append(puzzle[row_j][col_num])
            col_num += 1

        if diag:
            combos.append(diag)

    for col_i in range(1, num_cols):
        diag = []

        col_num = col_i
        for row_num in range(num_rows - 1, -1, -1):
            diag.append(puzzle[row_num][col_num])
            col_num += 1
            if col_num == num_cols:
                break

        if diag:
            combos.append(diag)

    # Diagonals Down
    for row_i in range(0, num_rows):
        diag = []
        col_num = 0
        for row_j in range(row_i, num_rows):
            diag.append(puzzle[row_j][col_num])
            col_num += 1
            if col_num == num_cols:
                break

        if diag:
            combos.append(diag)

    for col_i in range(0, num_cols):
        diag = []

        col_num = col_i
        for row_num in range(0, num_rows):
            diag.append(puzzle[row_num][col_num])
            col_num += 1
            if col_num == num_cols:
                break

        if diag:
            combos.append(diag)

    combos.extend([list(reversed(c)) for c in combos])
    return combos