Exemplo n.º 1
0
import argparse
import os
import time
from helpers import read_strings
from snp import Trie

if __name__=='__main__':
    start = time.time()
    parser = argparse.ArgumentParser('BA9B Implement TrieMatching')
    parser.add_argument('--sample',   default=False, action='store_true', help='process sample dataset')
    parser.add_argument('--extra',    default=False, action='store_true', help='process extra dataset')
    parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:    
        trie = Trie(['ATCG','GGGT'])
        print (trie.MatchAll('AATCGGGTTCAATCGGGGT'))
    
    if args.extra:
        Input,Expected  = read_strings('data/TrieMatching.txt',init=0)
        trie            = Trie(Input[1:])
        Actual          = trie.MatchAll(Input[0])
        Expected        = [int(e) for e in Expected[0].split()]
        print (len(Expected),len(Actual))
        diffs = [(e,a) for e,a in zip(Expected,Actual) if e!=a]
        print (diffs)
 
    elapsed = time.time()-start
    minutes = int(elapsed/60)
    seconds = elapsed-60*minutes
    print (f'Elapsed Time {minutes} m {seconds:.2f} s') 
    
Exemplo n.º 2
0
if __name__ == '__main__':
    start = time.time()
    parser = argparse.ArgumentParser(
        'BA10G 	Perform a Multiple Sequence Alignment with a Profile HMM ')
    parser.add_argument('--sample',
                        default=False,
                        action='store_true',
                        help='process sample dataset')
    parser.add_argument('--rosalind',
                        default=False,
                        action='store_true',
                        help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        pass

    if args.rosalind:
        Input = read_strings(
            f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt')

        Result = None
        print(Result)
        with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f:
            for line in Result:
                f.write(f'{line}\n')

    elapsed = time.time() - start
    minutes = int(elapsed / 60)
    seconds = elapsed - 60 * minutes
    print(f'Elapsed Time {minutes} m {seconds:.2f} s')
Exemplo n.º 3
0
                        action='store_true',
                        help='process sample dataset')
    parser.add_argument('--extra',
                        default=False,
                        action='store_true',
                        help='process extra dataset')
    parser.add_argument('--rosalind',
                        default=False,
                        action='store_true',
                        help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        pass

    if args.extra:
        Input, Expected = read_strings('data/....txt', init=0)
        trie = Trie(Input)
        Actual = None
        Expected.sort()
        print(len(Expected), len(Actual))
        diffs = [(e, a) for e, a in zip(Expected, Actual) if e != a]
        print(diffs)

    if args.rosalind:
        Input = read_strings(
            f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt')

        Result = None
        print(Result)
        with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f:
            for line in Result:
Exemplo n.º 4
0
    if args.banana:
        r, p, LCP = SuffixArray('panamabananas$', auxiliary=True, padLCP=True)
        for edge, _ in SuffixArray2Tree('panamabananas$',
                                        r,
                                        LCP,
                                        trace=args.trace):
            print(edge)

    if args.sample:
        for edge, _ in SuffixArray2Tree('GTAGT$', [5, 2, 3, 0, 4, 1],
                                        [0, 0, 0, 2, 0, 1],
                                        trace=args.trace):
            print(edge)

    if args.extra:
        Input, Expected = read_strings('data/SuffixTreeFromSuffixArray.txt',
                                       init=0)

        Result = [
            edge for edge, _ in
            SuffixArray2Tree(Input[0], [int(s) for s in Input[1].split(',')],
                             [int(s) for s in Input[2].split(',')],
                             trace=args.trace)
        ]
        print(f'Expected {len(Expected)} Edges, actual = {len(Result)}')
        Result.sort(key=lambda x: f'{len(x):04}{x}')
        Expected.sort(key=lambda x: f'{len(x):04}{x}')
        i = 0
        j = 0
        while i < len(Expected) and j < len(Result):
            if Expected[i] == Result[j]:
                i += 1
Exemplo n.º 5
0
def FindApproximateMatches(Text,Patterns,d):
    pass

if __name__=='__main__':
    start = time.time()
    parser = argparse.ArgumentParser('BA9O 	Find All Approximate Occurrences of a Collection of Patterns in a String ')
    parser.add_argument('--sample',   default=False, action='store_true', help='process sample dataset')
    parser.add_argument('--extra',   default=False, action='store_true', help='process extra dataset')
    parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        print (FindApproximateMatches('ACATGCTACTTT', ['ATT', 'GCC', 'GCTA', 'TATT'], 1))
         
    if args.extra:
        Input,Expected  = read_strings('data/MultipleApproximatePatternMatching.txt',init=0)
        Result          = FindApproximateMatches(Input[0],int(Input[1:-1],int(Input[-1])))
        for a,b in Result:
            print (a,b)
            
    if args.rosalind:
        Input  = read_strings(f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt')
 
        Result = FindApproximateMatches(Input[0],int(Input[1:-1],int(Input[-1])))
        print (Result)
        with open(f'{os.path.basename(__file__).split(".")[0]}.txt','w') as f:
            for line in Result:
                f.write(f'{line}\n')
                
    elapsed = time.time() - start
    minutes = int(elapsed/60)
Exemplo n.º 6
0
            #pass
        #else:
            #pass
    
if __name__=='__main__':
    start = time.time()
    parser = argparse.ArgumentParser('BA9E Find the Longest Substring Shared by Two Strings')
    parser.add_argument('--sample',   default=False, action='store_true', help='process sample dataset')
    parser.add_argument('--extra',    default=False, action='store_true', help='process extra dataset')
    parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        print (FindLongestRepeat('TCGGTAGATTGCGCCCACTC',
                                 'AGGGGCTCGCAGTGTAAGAA'))
        
    if args.extra:
        sys.setrecursionlimit(2000)
        Input,Expected  = read_strings('data/LongestSharedSubstring.txt',init=0)
        #print (Input[0])       
        Actual = FindLongestRepeat(Input[0],Input[1])
        print (len(Expected[0]),len(Actual))
        print (Expected[0])
        print (Actual)        
 
    if args.rosalind:
        pass
    
    elapsed = time.time()-start
    minutes = int(elapsed/60)
    seconds = elapsed-60*minutes
    print (f'Elapsed Time {minutes} m {seconds:.2f} s')
Exemplo n.º 7
0
                        action='store_true',
                        help='process sample dataset')
    parser.add_argument('--extra',
                        default=False,
                        action='store_true',
                        help='process extra dataset')
    parser.add_argument('--rosalind',
                        default=False,
                        action='store_true',
                        help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        print(FindShortestNonShared('CCAAGCTGCTAGAGG', 'CATGCTGGGCTGGCT'))

    if args.extra:
        Input, Expected = read_strings('data/ShortestNonSharedSubstring.txt',
                                       init=0)
        print(Expected[0])
        Actual = FindShortestNonShared(Input[0], Input[1])
        print(len(Expected[0]), len(Actual))
        print(Expected[0])
        print(Actual)

    if args.rosalind:
        Input = read_strings(
            f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt')
        Result = FindShortestNonShared(Input[0], Input[1])
        print(Result)
        with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f:
            f.write(f'{Result}\n')

    elapsed = time.time() - start
Exemplo n.º 8
0
                        default=False,
                        action='store_true',
                        help='process extra dataset')
    parser.add_argument('--rosalind',
                        default=False,
                        action='store_true',
                        help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        tree = SuffixTree()
        tree.build('ATAAATG$')
        for edge in tree.collectEdges():
            print(edge)

    if args.extra:
        Input, Expected = read_strings('data/SuffixTreeConstruction.txt',
                                       init=0)
        tree = SuffixTree()
        tree.build(Input[0])
        #tree.print()
        Edges = tree.collectEdges()

        compare_edges(Edges, Expected)

    if args.rosalind:
        Input = read_strings(r'data/rosalind_ba9c.txt')
        tree = SuffixTree()
        tree.build(Input[0])
        Edges = tree.collectEdges()
        for e in Edges:
            print(e)
Exemplo n.º 9
0
                        help='Controls display of probabilities')
    args = parser.parse_args()
    if args.sample:
        Transitions, Emissions = EstimateParameters('yzzzyxzxxx',
                                                    ['x', 'y', 'z'],
                                                    'BBABABABAB',
                                                    ['A', 'B', 'C'])
        for row in formatTransition(Transitions, ['A', 'B', 'C'],
                                    precision=args.precision):
            print(row)
        for row in formatEmission(Emissions, ['A', 'B', 'C'], ['x', 'y', 'z'],
                                  precision=args.precision):
            print(row)

    if args.extra:
        Input, Expected = read_strings(f'data/HMMParameterEstimation.txt',
                                       init=0)
        Transitions, Emissions = EstimateParameters(Input[0], Input[2].split(),
                                                    Input[4], Input[6].split())
        for row in formatTransition(Transitions,
                                    Input[6].split(),
                                    precision=args.precision):
            print(row)
        print('--------')
        for row in formatEmission(Emissions,
                                  Input[6].split(),
                                  Input[2].split(),
                                  precision=args.precision):
            print(row)

    if args.rosalind:
        Input = read_strings(
Exemplo n.º 10
0
    DOWNRIGHT = 2
    LinearSpaceAlignment(0,len(v)+1,0,len(w)+1)
        
    
if __name__=='__main__':
    start = time.time()
    parser = argparse.ArgumentParser('BA5L.py Align Two Strings Using Linear Space')
    parser.add_argument('--sample',   default=False, action='store_true', help='process sample dataset')
    parser.add_argument('--extra',     default=False, action='store_true', help='process extra dataset')
    parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        print (alignUsingLinearSpace('PLEASANTLY','MEANLY'))
     
    if args.extra:
        Input,Expected             = read_strings(f'data/linear_space_alignment.txt',init=0)
        print (alignUsingLinearSpace(Input[0],Input[1]))
        
    if args.rosalind:
        Input  = read_strings(f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt')
 
        Result = None
        print (Result)
        with open(f'{os.path.basename(__file__).split(".")[0]}.txt','w') as f:
            for line in Result:
                f.write(f'{line}\n')
                
    elapsed = time.time() - start
    minutes = int(elapsed/60)
    seconds = elapsed - 60*minutes    
Exemplo n.º 11
0
                        action='store_true',
                        help='process sample dataset')
    parser.add_argument('--extra',
                        default=False,
                        action='store_true',
                        help='process extra dataset')
    parser.add_argument('--rosalind',
                        default=False,
                        action='store_true',
                        help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        print(FindLongestRepeat('ATATCGTTTTATCGTT'))

    if args.extra:
        Input, Expected = read_strings('data/LongestRepeat.txt', init=0)
        print(Input[0])
        Actual = FindLongestRepeat(Input[0])
        print(len(Expected[0]), len(Actual))
        print(Expected[0])
        print(Actual)

    if args.rosalind:
        Input = read_strings('data/rosalind_ba9d.txt')
        Result = FindLongestRepeat(Input[0])
        print(Result)
        with open('ba9d.txt', 'w') as f:
            f.write(f'{Result}\n')

    elapsed = time.time() - start
    minutes = int(elapsed / 60)