Пример #1
0
    def solve(self,
              X0: Matrix,
              fasta_options: dict = None) -> Tuple[Matrix, Convergence]:
        """Solve the multiple measurement vector (MMV) problem.

        :param X0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The problem's computed solution and information on FASTA's convergence
        """
        f = lambda Z: .5 * la.norm((Z - self.B).ravel())**2
        gradf = lambda Z: Z - self.B
        g = lambda X: self.mu * np.sum(np.sqrt(np.sum(X * X, axis=1)))

        def prox_mmv(X, t):
            norms = la.norm(X, axis=1)

            # Shrink the norms, and ensure we don't divide by zero
            scale = proximal.shrink(norms, t) / (norms + (norms == 0))

            return X * scale[:, np.newaxis]

        proxg = lambda X, t: prox_mmv(X, self.mu * t)

        X = fasta(self.A, self.At, f, gradf, g, proxg, X0,
                  **(fasta_options or {}))

        return X.solution, X
Пример #2
0
    def solve(self, Y0: Matrix, fasta_options: dict=None) -> Tuple[Matrix, Convergence]:
        """Solve the total variation denoising problem.

        :param Y0: An initial guess for the gradient of the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The problem's computed solution and convergence information on FASTA
        """
        f = lambda Z: .5 * la.norm((Z - self.M/self.mu).ravel())**2
        gradf = lambda Z: Z - self.M/self.mu
        g = lambda Y: 0

        def proxg(Y, t):
            # Norm of the gradient at each point in space
            norms = la.norm(Y, axis=Y.ndim-1)

            # Scale norms so that gradients have magnitude at least one
            norms = np.maximum(norms, 1)

            return Y / norms[...,np.newaxis]

        # Solve dual problem
        Y = fasta(div, grad, f, gradf, g, proxg, Y0, **(fasta_options or {}))

        X = self.M - self.mu * div(Y.solution)

        return X, Y
Пример #3
0
def assemble(read1, read2, *extra_seqs):
    tracks1 = ab1.read(read1)
    tracks2 = ab1.read(read2)
    ref = contig.contig(tracks1['sequence'], tracks1['confidences'],
                        tracks.revcomp(tracks2['sequence']), 
                        tracks.revcomp(tracks2['confidences']))
    t = tracks.TrackSet()

    read1_offset, read1_sequence = ref['read1']
    read2_offset, read2_sequence = ref['read2']
    read1_confs = tracks.regap(read1_sequence, tracks1['confidences'])
    read2_confs = tracks.regap(read2_sequence, tracks.revcomp(tracks2['confidences']))
    read1_traces = tracks.regap(read1_sequence, tracks1['traces'])
    read2_traces = tracks.regap(read2_sequence, tracks.revcomp(tracks2['traces']))

    t.extend([
              tracks.TrackEntry('read 1 traces', read1_offset, read1_traces),
              tracks.TrackEntry('read 1 confidences', read1_offset, read1_confs),
              tracks.TrackEntry('read 1 bases', read1_offset, read1_sequence),
              tracks.TrackEntry('read 2 traces', read2_offset, read2_traces),
              tracks.TrackEntry('read 2 confidences', read2_offset, read2_confs),
              tracks.TrackEntry('read 2 bases', read2_offset, read2_sequence)])


    if ref['reference'] != None:
        reference_offset, reference_sequence = ref['reference']
        t.append(tracks.TrackEntry('reference', reference_offset, reference_sequence))

    for (name,s) in extra_seqs:
        if ref['reference'] != None:
            (roffset, _), (soffset, saligned) = fasta.fasta(reference_sequence, s)
            t.append(tracks.TrackEntry(name, reference_offset + soffset - roffset,
                                       tracks.sequence(saligned)))
        else:
            t.append(tracks.TrackEntry(name, 0, s))

    # Now add an assembly of the lab sequence and reference sequence
    if len(extra_seqs) == 1 and ref['reference'] != None:
        labtrack = t[-1]
        reftrack = t[-2]
        offset = max(labtrack.offset, reftrack.offset)
        loffset = offset - labtrack.offset
        roffset = offset - reftrack.offset
        assert loffset >= 0 and roffset >= 0 and (loffset == 0 or roffset == 0)
        bases = tracks.sequence(''.join([a == b and ' ' or 'X' for a,b in
                                         zip(labtrack.track[loffset:], 
                                             reftrack.track[roffset:])]))
        if 'X' in bases:
            t.append(tracks.TrackEntry('mismatches', offset, bases))
    return (t, ref['strands'])
Пример #4
0
    def solve(self,
              x0: Vector,
              fasta_options: dict = None) -> Tuple[Vector, Convergence]:
        """Solve the LASSO regression problem with FASTA.

        :param x0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The problem's computed solution and information on FASTA's convergence
        """
        f = lambda z: .5 * la.norm((z - self.b).ravel())**2
        gradf = lambda z: z - self.b
        g = lambda x: 0  # TODO: add an extra condition to this
        proxg = lambda x, t: proximal.project_L1_ball(x, self.mu)

        x = fasta(self.A, f, gradf, g, proxg, x0, **(fasta_options or {}))

        return x.solution, x
    def solve(self,
              x0: Vector,
              fasta_options: dict = None) -> Tuple[Vector, Convergence]:
        """Solve the democratic representation problem.

        :param x0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The computed democratic representation of the signal and information on FASTA's convergence
        """
        f = lambda z: .5 * la.norm((z - self.b).ravel())**2
        gradf = lambda z: z - self.b
        g = lambda x: self.mu * la.norm(x, np.inf)
        proxg = lambda x, t: proximal.project_Linf_ball(x, t * self.mu)

        x = fasta(self.A, f, gradf, g, proxg, x0, **(fasta_options or {}))

        return x.solution, x
Пример #6
0
    def solve(self,
              X0: Matrix,
              fasta_options: dict = None) -> Tuple[Matrix, Convergence]:
        """Solve the 1-bit logistic matrix completion problem with FASTA.

        :param X0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The reconstructed matrix and information on FASTA's convergence
        """
        f = lambda Z: np.sum(np.log(1 + np.exp(Z)) - (self.B == 1) * Z)
        gradf = lambda Z: -self.B / (1 + np.exp(self.B * Z))
        g = lambda X: self.mu * la.norm(np.diag(la.svd(X)[1]), 1)
        proxg = lambda X, t: proximal.project_Lnuc_ball(X, t * self.mu)

        X = fasta(None, None, f, gradf, g, proxg, X0, **(fasta_options or {}))

        return X.solution, X
Пример #7
0
    def solve(self,
              x0: Matrix,
              fasta_options: float = None) -> Tuple[Vector, Convergence]:
        """Solve the non-negative least squares problem.

        :param x0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The problem's computed solution and information on FASTA's convergence
        """
        f = lambda z: .5 * la.norm((z - self.b).ravel())**2
        gradf = lambda z: z - self.b
        g = lambda x: 0
        proxg = lambda x, t: np.maximum(x, 0)

        x = fasta(self.A, self.At, f, gradf, g, proxg, x0,
                  **(fasta_options or {}))

        return x.solution, x
Пример #8
0
    def solve(self,
              x0: Vector,
              fasta_options: dict = None) -> Tuple[Vector, Convergence]:
        """Solve the L1-penalized logistic least squares problem.

        :param x0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The problem's computed solution and information on FASTA's convergence
        """
        f = lambda z: np.sum(np.log(1 + np.exp(z)) - (self.b == 1) * z)
        gradf = lambda z: -self.b / (1 + np.exp(self.b * z))
        g = lambda x: self.mu * la.norm(x.ravel(), 1)
        proxg = lambda x, t: proximal.shrink(x, t * self.mu)

        x = fasta(self.A, self.At, f, gradf, g, proxg, x0,
                  **(fasta_options or {}))

        return x.solution, x
Пример #9
0
    def solve(self,
              y0: Vector,
              fasta_options: dict = None) -> Tuple[Vector, Convergence]:
        """Solve the support vector machine problem.

        :param Y0: An initial guess for the dual variable
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The computing hyperplane separating the data and information on FASTA's convergence
        """
        f = lambda y: .5 * la.norm(
            (self.D.T @ (self.l * y)).ravel())**2 - np.sum(y)
        gradf = lambda y: self.l * (self.D @ (self.D.T @ (self.l * y))) - 1
        g = lambda y: 0
        proxg = lambda y, t: np.minimum(np.maximum(y, 0), self.C)

        # Solve dual problem
        y = fasta(None, None, f, gradf, g, proxg, y0, **(fasta_options or {}))

        x = self.D.T @ (self.l * y.solution)

        return x, y
Пример #10
0
    def solve(
        self,
        inits: Tuple["Matrix", "Matrix"],
        fasta_options: dict = None
    ) -> Tuple[Tuple["Matrix", "Matrix"], Convergence]:
        """Solve the L1-penalized non-negative matrix factorization problem.

        :param inits: A tuple containing the initial guesses for X0 and Y0, respectively
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The two computed factor matrices and information on FASTA's convergence
        """
        # Combine unknowns into single matrix so FASTA can handle them
        Z0 = np.concatenate(inits)

        # First N rows of Z are X, so X = Z[:N,...], Y = Z[N:,...]
        N = inits[0].shape[0]

        f = lambda Z: .5 * la.norm(
            (self.S - Z[:N, ...] @ Z[N:, ...].T).ravel())**2

        def gradf(Z):
            # Split the iterate matrix into the X and Y matrices
            X = Z[:N, ...]
            Y = Z[N:, ...]

            # Compute the actual gradient
            d = X @ Y.T - self.S
            return np.concatenate((d @ Y, d.T @ X))

        g = lambda Z: self.mu * la.norm(Z[:N, ...].ravel(), 1)
        proxg = lambda Z, t: np.concatenate(
            (proximal.shrink(Z[:N, ...], t * self.mu),
             np.minimum(np.maximum(Z[N:, ...], 0), 1)))

        Z = fasta(None, None, f, gradf, g, proxg, Z0, **(fasta_options or {}))

        return (Z.solution[:N, ...], Z.solution[N:, ...]), Z
Пример #11
0
    def solve(self,
              X0: Matrix,
              fasta_options: dict = None) -> Tuple[Matrix, Convergence]:
        """Solve the max-norm problem.

        :param X0: An initial guess for the solution
        :param fasta_options: Options for the FASTA algorithm (default: None)
        :return: The problem's computed solution and information on FASTA's convergence
        """
        f = lambda X: np.sum(self.S * (X @ X.T))
        gradf = lambda X: (self.S + self.S.T) @ X
        g = lambda X: 0

        def proxg(X, t):
            norms = la.norm(X, axis=1)

            # Shrink the norms that are too big, and ensure we don't divide by zero
            scale = np.maximum(norms, self.mu) + (norms == 0)

            return self.mu * X / scale[:, np.newaxis]

        X = fasta(None, None, f, gradf, g, proxg, X0, **(fasta_options or {}))

        return X.solution, X
#!/usr/bin/env python

import fasta
import argparse

parser = argparse.ArgumentParser(
    description='gets kmer (eg. dimer) distribution for each position')
parser.add_argument('-i', required=True, help='input')
parser.add_argument('-o', required=True, help='output')
parser.add_argument('-k', required=True, help='k of kmer')
parser.add_argument('-l', required=False, help='sequence length of interest')
parser.add_argument('--percentage',
                    action='store_true',
                    help='Write percentages instead if actual counts')

args = parser.parse_args()

if args.percentage:
    percentageFlag = True
else:
    percentageFlag = False

Fasta = fasta.fasta(args.i)
if args.l:
    kmerAbundanceDict = Fasta.getKmerAbundance(int(args.k), args.l)
else:
    kmerAbundanceDict = Fasta.getKmerAbundance(int(args.k))

Fasta.writeKmerAbundanceTable(kmerAbundanceDict, args.o, percentageFlag)
Пример #13
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 23 22:18:01 2018

@author: Dennis
"""

#For finding the longest common substring

from datetime import datetime
startTime = datetime.now()
####
from fasta import fasta
import numpy as np

names, strings = fasta("rosalind_lcsm.txt")

s = strings[0]
t = strings[1]
m = len(s)
n = len(t)

csarray = np.zeros((m, n))

for i in range(0, m):
    for j in range(0, n):
        if s[i] == t[j]:
            if i == 0 or j == 0:
                csarray[i, j] = 1
            else:
                csarray[i, j] = csarray[i - 1, j - 1] + 1
Пример #14
0
  parser.add_argument('-p','--max-overlap',help='''\
        Overlap threshold between blast hits (in the subject sequence) -
        i.e. if two hits have an overlap by at least this many base pairs,
        they are part of a nest. If this option is omitted, any overlap
        whatever will trigger a nest relationship, while specifying a
        higher number allows insignificant overlaps to be ignored.''')
  return parser

if __name__=='__main__' and not sys.flags.interactive:
    parser = makeparser()
    args = parser.parse_args()
    if 0 in (args.max_overlap,args.min_distance,args.min_length):
        parser.print_usage()
        sys.exit(parser.prog+': error: 0 not a valid arg')
    for k,(T,v) in defaults.iteritems():
        given = getattr(args,k)
        try: setattr(args,k,v if given is None else T(given))
        except ValueError:
            parser.print_usage()
            sys.exit('{}: error: bad type for --{} (got {})'.format(
                                   parser.prog,k.replace('_','-'),given))
    with fasta.fasta(args.out,args.mode) as out:
        classify.full_transposon_treatment(
             seq = classify.hitsfromcsv(args.file),
             overlap = args.max_overlap,
             gap = args.min_distance,
             minlength = args.min_length,
             evalue = args.evalue_threshold,
             fastaout = out
        )
Пример #15
0
#!/usr/bin/env python
# Not tested!
import fasta
import argparse

parser = argparse.ArgumentParser(
    description='get subsequences having the motif of interest')
parser.add_argument('-i', required=True, help='input')
parser.add_argument('-l',
                    required=False,
                    help='comma separated lengths of interest')

args = parser.parse_args()
input = args.i
seqLengths = args.l.split(',')
for seqLength in seqLengths:
    fileDict[seqLength] = open(input + '.' + str(seqLength) + '.fa', 'w')
fastaDict = fasta.fasta(input).read()

for header in fastaDict.keys():
    sequence = fastaDict[header]
    strSequenceLength = str(len(sequence))
    if strSequenceLength in seqLengths:
        fileDict[strSequenceLength].write('>' + header + '\n' + sequence +
                                          '\n')
Пример #16
0
 def fa2genomeSize(self):
     Fasta = fasta.fasta(self.fasta)
     with open(self.chromSizes, 'w') as out:
         out.write(Fasta.singleEntry2chromSize())
     return self
Пример #17
0
#
# hmm_viterbi.py <hmm> <seqs>
#
# Outputs the Viterbi decodings of the sequences in the fasta file <seqs> using
# the HMM defined in the file <hmm>. The format <hmm> and <seqs> are as described
# in the projects in MLiB Q3/2015.
#
# Christian Storm Pedersen, 08-feb-2015

import sys
import string
from hmm import hmm
from fasta import fasta

m = hmm(sys.argv[1])
d = fasta(sys.argv[2])

print '; Viterbi-decodings of %s using HMM %s' % (sys.argv[2], sys.argv[1])
print
for key in sorted(d.keys()):
    x = m.str_to_obs(d[key])

    # Compute Viterbi decoding and its log-likelihood
    vit_z, vit_logpz = m.viterbi_decoding(x)

    print '>' + key
    print d[key]
    print '# '
    print m.states_to_str(vit_z)
    print '; log P(x,z) = %f' % (vit_logpz)
    print
#!/usr/bin/env python

import fasta
import argparse
import re
import sys

parser = argparse.ArgumentParser(
    description='converts fasta to bed by choosing a motif of interest')
parser.add_argument('-i', required=False, help='input')
parser.add_argument('-o', required=False, help='output')
parser.add_argument('-r', required=True, help='regex motif that is expected')

args = parser.parse_args()

fastaObject = fasta.fasta(args.i)
output = args.o
regexMotif = args.r
pattern = re.compile(regexMotif)


def criterionPass(citerion):
    code = 'True if ' + citerion + ' else False'
    result = eval(code)
    return result


def fastaHeader2bedLine(header):
    chr = header.replace('>', '').split(':')[0]
    start = int(header.split(':')[1].split('(')[0].split('-')[0])
    end = int(header.split(':')[1].split('(')[0].split('-')[1])
Пример #19
0
import os
from fasta import fasta

scriptDir = os.path.dirname(os.path.realpath(__file__))
kTestFilesDir = os.path.join(scriptDir, 'testFiles')
kFastaExample1 = os.path.join(kTestFilesDir, 'fastaExample1.fa')
kFastaExample2 = os.path.join(kTestFilesDir, 'fastaExample2.fa')
kFastaExample4 = os.path.join(kTestFilesDir, 'fastaExample4.fa')
kFastaExample5 = os.path.join(kTestFilesDir, 'fastaExample5.fa')

fasta1 = fasta(kFastaExample1)
fasta2 = fasta(kFastaExample2)
fasta4 = fasta(kFastaExample4)
fasta5 = fasta(kFastaExample5)


def test_sequenceCount():
    assert fasta1.getSequenceCount() == 2
    assert fasta2.getSequenceCount() == 4


def test_firstSeqLength():
    assert fasta1.getFirstSeqLength() == 7
    assert fasta2.getFirstSeqLength() == 11


def test_maxSeqLength():
    assert fasta1.getMaxSeqLength() == 7
    assert fasta2.getMaxSeqLength() == 25

Пример #20
0
#!/usr/bin/env python

import fasta
import argparse

parser = argparse.ArgumentParser(description='gets kmer (eg. dimer) distribution for each position')
parser.add_argument('-i', required= True, help='input')
parser.add_argument('-o', required= True, help='output')
parser.add_argument('-k', required= True, help='k of kmer')
parser.add_argument('-r', required= True, help='length range (eg 9-13)')

args = parser.parse_args()
input = args.i
output = args.o
kmer = int(args.k)
lengthRange = args.r

fasta.fasta(input).separateByLengthAndWriteKmerAbundance(kmer, lengthRange, output)
Пример #21
0
def test_fasta():
    fasta('(all)')
Пример #22
0
import fasta

def gccontent(fastastr):
	num = sum((c == 'G' or c == 'C') for c in fastastr)
	return (100*num)/float(len(fastastr))

def gc(gc_dict):
	max_score = 0
	max_string = ""
	for key in gc_dict:
		score = gccontent(gc_dict[key])
		#print key, score
		if score > max_score:
			max_score = score
			max_string = key
	return (max_score, max_string)

if __name__ == "__main__":
	(gc_dict, invalid) = fasta.fasta('rosalind_gc.txt')
	if not invalid:
		(max, maxstr) = gc(gc_dict)
		print maxstr
		print max
		
Пример #23
0
def solvePhaseMax(A=None, At=None, b0=None, x0=None, opts=None):
    # Initialization
    m = len(b0)
    n = len(x0)
    remainIters = opts.maxIters

    # It's initialized to opts.maxIters.
    # %  Normalize the initial guess relative to the number of measurements
    #  Normalize the initial guess relative to the number of measurements
    x0 = dot((x0 / norm(x0.flatten('F'))),
             np.mean(b0.flatten('F'))) * (m / n)*100
    #  re-scale the initial guess so that it approximately satisfies |Ax|=b
    sol = np.multiply(x0, np.min(b0 / np.abs(dot(A, x0))))
    ending = 0
    itera = 0
    currentTime = []
    currentResid = []
    currentReconError = []
    currentMeasurementError = []

    solveTimes, measurementErrors, reconErrors, residuals = initializeContainers(
        opts)

    f = lambda z=None: dot(0.5, norm(np.max(np.abs(z) - b0, 0)) ** 2)

    gradf = lambda z=None: (np.multiply(np.sign(z), np.max(np.abs(z) - b0, 0)))

    # Options to hand to fasta
    fastaOpts = struct
    fastaOpts.maxIters = opts.maxIters
    fastaOpts.stopNow = lambda x=None, itera=None, resid=None, normResid = None, maxResid = None, opts = None: processIteration(
        x, resid)

    startTime = time.time  # Start timer
    fastaOpts.verbose = 0
    constraintError = norm(abs(dot(A, sol)) - b0)
    while (remainIters > 0) & (not (ending)):
        g = lambda x=None: - np.real(dot(x0.T, x))
        proxg = lambda x=None, t=None: x + t*x0
        fastaOpts.tol = norm(x0) / 100
        # Call FASTA to solve the inner minimization problem
        sol, _, fastaOuts = fasta(A, At, f, gradf, g, proxg, sol, fastaOpts)
        fastaOpts.tau = fastaOuts.stepsizes
        x0 = x0 / 10
        # Update the max number of iterations for fasta
        remainIters = remainIters - fastaOuts.iterationCount
        fastaOpts.maxIters = min(opts.maxIters, remainIters)
        newConstraintError = norm(np.max(np.abs(dot(A, sol)) - b0, 0))

        relativeChange = abs(constraintError - newConstraintError) / norm(b0)
        if relativeChange < opts.tol:
            break
        constraintError = newConstraintError

    # Create output according to the options chosen by user
    outs = generateOutputs(opts, itera, solveTimes,
                           measurementErrors, reconErrors, residuals)

    if opts.verbose == 1:
        displayVerboseOutput(itera, currentTime, currentResid,
                             currentReconError, currentMeasurementError)

    # Runs code upon each FASTA iteration. Returns whether FASTA should terminate.
    def processIteration(x=None, residual=None):
        itera = itera + 1

        # Record convergence information and check stopping condition, If xt is provided, reconstruction error will be computed and used for stopping condition. Otherwise, residual will be computed and used for stopping condition.
        if opts.xt:
            xt = opts.xt
            # Compute optimal rotation
            alpha = (dot(x.flatten('F').T, xt.flatten('F'))) / \
                (dot(x.flatten('F').T, x.flatten('F')))
            x = dot(alpha, x)
            currentReconError = norm(x - xt) / norm(xt)
            if opts.recordReconErrors:
                reconErrors[itera] = currentReconError

        if opts.xt == None:
            currentResid = residual

        if opts.recordResiduals:
            residuals[itera] = residual
            
        currentTime = time.time - startTime  # Record elapsed time so far

        if opts.recordTimes:
            solveTimes[itera] = currentTime

        if opts.recordMeasurementErrors:
            currentMeasurementError = norm(
                abs(A(sol)) - b0) / norm(b0)
            measurementErrors[itera] = currentMeasurementError

        # Display verbose output if specified
        if opts.verbose == 2:
            displayVerboseOutput(
                itera, currentTime, currentResid, currentReconError, currentMeasurementError)

        # Test stopping criteria.
        stop = False
        if currentTime > opts.maxTime:  # Stop if we're run over the max runtime
            stop = True

        if not(opts.xt == None):  # If true solution is specified, terminate when close to true solution
            # assert(not((currentReconError==None),'If xt is provided, currentReconError must be provided.')
            stop = stop
            ending = stop  # When true, this flag will terminate outer loop
        stop = stop

        return stop
    return sol, outs
Пример #24
0
def process_seq2sites(handler):
    """Process data submitted in seq2sites form"""
    # get posted parameters
    content = handler.request.get('content')
    wNN = handler.request.get('wNN') == 'yes'

    # submission validation and error reporting
    problems = []
    valid = True

    # get the submitted data
    content = content.encode('utf8')

    # make sure something was submitted
    if len(content) == 0:
        valid = False
        handler.redirect("/seq2sites/")

    # determine format
    format = None
    if content.startswith('>'):
        format = 'fasta'
    else:
        format = 'single_seq'
        
    # pull names and sequence out of submitted content
    names = []
    seqs = []
    if format == 'fasta':
        try:
            fnames = []
            fseqs = []
            for entry in fasta(content, 's'):
                fnames.append(entry['name'])
                fseqs.append(RE_NON_IUPAC.sub('', entry['sequence'].upper()))
            names = fnames
            seqs = fseqs
        except:
            valid = False
            problems.append('There was an error in the FASTA format')
    else:
        names = ['']
        seqs = [RE_NON_IUPAC.sub('', content.upper())]

    # enforce limits for multisequence submissions
    if format == 'fasta':
        max_length = 0
        for seq in seqs:
            if len(seq) > max_length:
                max_length = len(seq)
        if max_length <= LONG_SEQ_CUTOFF:
            if len(seqs) > MAX_SEQS_SHORT:
                valid = False
                problems.append('too many sequences submitted')
        elif len(seqs) > MAX_SEQS_LONG:
            valid = False
            problems.append('too many sequences submitted')

    if not valid:
        return (False, {'problems': problems})

    result_lines = []
    sites_by_line = []
    for seq in seqs:
        try:
            sites = seq2sites(seq)
            sites_by_line.append(sites)
            result_lines.append(sites2str(sites))
        except Exception, e:
            result_lines.append('There was an error: %s' % e)
Пример #25
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 23 13:29:35 2018

@author: Dennis
"""

from fasta import fasta

names, strings = fasta("rosalind_grph.txt")

ajlist = []
k = 3

for i in range(0, len(strings)):
    for j in range(0, len(strings)):
        if i != j:
            match = True
            for n in range(0, k):
                if match and strings[i][-k + n] == strings[j][n]:
                    match = True
                else:
                    match = False
            if match:
                ajlist += [(i, j)]

f = open("output.txt", "w")
output = ""
for aj in ajlist:
    output += names[aj[0]] + " " + names[aj[1]] + "\n"
Пример #26
0
import argparse

parser = argparse.ArgumentParser(
    description='get subsequences having the motif of interest')
parser.add_argument('-i', required=True, help='input')
parser.add_argument('-m', required=True, help='motif')
parser.add_argument('-r', required=True, help='right of motif (Integer)')
parser.add_argument('-l', required=False, help='left of motif(Integer)')

args = parser.parse_args()
motif = args.m
rightNumber = int(args.r)
leftNumber = int(args.l)

lengthOfMotif = len(motif)
fastaDict = fasta.fasta(args.i).read()

for header in fastaDict.keys():
    sequenceCount = 0
    sequence = fastaDict[header]
    for i in range(len(sequence)):
        subseq = sequence[i:i + lengthOfMotif]
        if subseq == motif:
            startPosition = i - leftNumber
            endPosition = i + lengthOfMotif + rightNumber
            currentSequence = sequence[startPosition:endPosition]
            if len(currentSequence
                   ) == lengthOfMotif + leftNumber + rightNumber:
                sequenceCount += 1
                print('>' + header + '.' + str(sequenceCount) + '\n' +
                      currentSequence)
Пример #27
0
parser.add_argument('-f', required=True, help='<Required> fasta')
parser.add_argument('-m', required=True, help='<Required> motif')
parser.add_argument('-c', required=True, help='<Required> countTab')
parser.add_argument('-s',
                    required=True,
                    help='<Required> strandInfo, +, - or column No')
parser.add_argument('-o', required=True, help='<Required> output')

args = parser.parse_args()
bedFile = args.i
outBedFile = args.o
fastaFile = args.f
motif = args.m
printFlag = args.conPrint

fastaInput = fasta.fasta(fastaFile)
headers = fastaInput.read()


def conPrint(text):
    if printFlag:
        print(text)


if args.perNmotif:
    perNmotif = int(args.perNmotif)
else:
    perNmotif = (1 / 4) ^ (len(motif)) * 1000


def motifCount(sequence, positionStart, positionEnd, strand, motif):