def weightedLoad(infile,weightthresh=None): myAlign = simformat.read(infile) myHeader = myAlign.header if weightthresh is not None: try: weightsindex = myHeader.cutoffs.index(weightthresh) except: raise Exception("No such weighting cutoff, valid cutoffs are: " + repr(myHeader.cutoffs)) simformat.annotateAlignment(myAlign) weights = S.array(simformat.getinvnormsim(myAlign,weightsindex)) else: weights = S.ones(len(myAlign)) N = len(myAlign) Width = len(myAlign[0]) Matrix = sp.lil_matrix((N,Q*Width)) #LiL is better to populate, csc might be even better, but we'd have to write more complex code. for seqRec,one_weight,i in izip(myAlign,weights,count()): seq_as_ints = intConv(seqRec.seq.tostring()) for residue,j in izip(seq_as_ints,count()): Matrix[i,j*Q + residue] = one_weight return Matrix.tocsc()
def main(): #parse commandline arguments parser = OptionParser(usage="Usage: %prog [options] <inputFAA> <outputSIM>") parser.add_option('-u','--unique',dest='unique',default=True,action='store_true') parser.add_option('-i','--ids',dest='cutoffs',type='string',default='100,98,95,90,85,80,75,70') parser.add_option('-f','--fids',dest='fcutoffs',type='string',default=None) options, args = parser.parse_args() if len(args) != 2: parser.print_help() sys.exit() #read the alignment from file with open(args[0]) as infile: myAlignment = simformat.read(infile) if options.unique: myAlignment._records = uniqSeqs(myAlignment._records) simformat.annotateAlignment(myAlignment) if options.fcutoffs is None: thresholds = S.array(map(float,options.cutoffs.split(',')))/100.0 else: thresholds = S.array(map(float,options.fcutoffs.split(','))) myAlignment.header.cutoffs = thresholds #MORE CODE HERE!!! AsVects = [S.array(map(ord, record.seq.tostring())) for record in myAlignment] AllSimilarities = 1.0 - D.cdist(AsVects,AsVects,'hamming') weights = S.zeros((len(myAlignment),len(thresholds))) for oneThresh, col in izip(thresholds,count()): weights[:,col] = (AllSimilarities > oneThresh).sum(1) for i in range(0,len(myAlignment)): myAlignment._records[i].annotations["weights"] = map(int,weights[i]) #write output file with open(args[1],"w") as outfile: simformat.write(outfile,myAlignment)