def testGetSequence(genomeFilePath): chrom = 'chr1' seq = '808984' seqlen = 1 seqStart = int(seq) - seqlen - 500 seqEnd = seqStart + seqlen + 1000 genome = sequenceutils.loadGenome(genomeFilePath) seqA = sequenceutils.getSequence(genome,chrom,seqStart,seqEnd) seqA = seqA.upper() print seqA print "~"*50 # get sesion id print "Getting sesion id ..." hgsid = genomebrowser.gb_getSessionId() print hgsid if hgsid == "": print "blank hgsid. quitting" sys.exit(1) seqB = genomebrowser.gb_getSequence(hgsid) print seqB print "~"*50 if seqA == seqB: print "Sequences match!" else: print "Sequences do not match :(" bseqA = sequenceutils.bracketSequence(seqA) bseqB = sequenceutils.bracketSequence(seqB) if bseqA != bseqB: print "Bracketed sequences do not match!" sys.exit(1) primerA = webprimer3.getPrimer(bseqA,chrom,int(seq)) primerB = webprimer3.getPrimer(bseqB,chrom,int(seq)) if primerA.fSeq != primerB.fSeq \ or primerA.rSeq != primerB.rSeq \ or primerA.size != primerB.size: print "Primer mismatch" print "primerA:" print primerA print "primerB:" print primerB sys.exit(1) else: print "Primer match" print primerA primerList = [primerA,primerB] outfile = "/tmp/"+str(uuid.uuid4())+".csv" fileutils.primersToCsv(primerList,outfile) print "Wrote %s"%(outfile)
def testGetSequence(genomeFilePath): chrom = 'chr1' seq = '808984' seqlen = 1 seqStart = int(seq) - seqlen - 500 seqEnd = seqStart + seqlen + 1000 genome = sequenceutils.loadGenome(genomeFilePath) seqA = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) seqA = seqA.upper() print seqA print "~" * 50 # get sesion id print "Getting sesion id ..." hgsid = genomebrowser.gb_getSessionId() print hgsid if hgsid == "": print "blank hgsid. quitting" sys.exit(1) seqB = genomebrowser.gb_getSequence(hgsid) print seqB print "~" * 50 if seqA == seqB: print "Sequences match!" else: print "Sequences do not match :(" bseqA = sequenceutils.bracketSequence(seqA) bseqB = sequenceutils.bracketSequence(seqB) if bseqA != bseqB: print "Bracketed sequences do not match!" sys.exit(1) primerA = webprimer3.getPrimer(bseqA, chrom, int(seq)) primerB = webprimer3.getPrimer(bseqB, chrom, int(seq)) if primerA.fSeq != primerB.fSeq \ or primerA.rSeq != primerB.rSeq \ or primerA.size != primerB.size: print "Primer mismatch" print "primerA:" print primerA print "primerB:" print primerB sys.exit(1) else: print "Primer match" print primerA primerList = [primerA, primerB] outfile = "/tmp/" + str(uuid.uuid4()) + ".csv" fileutils.primersToCsv(primerList, outfile) print "Wrote %s" % (outfile)
def processRows(self, rows, genomeFile, db='hg38', chromcol='#CHROM', poscol='POS', refcol='REF', bracketlen=500, primerlen='200-500'): # celery kung fu self.primers = list() #self.warnings = list() warnings = list() task_id = processRows.request.id rowCount = len(rows) for idx, row in enumerate(rows): logger.info('Processing row %d' % (idx)) warnings.append('Processing row %d' % (idx)) if chromcol not in row or \ poscol not in row or \ refcol not in row: print "~" * 20 + " MISSING COL IN ROW " + "~" * 20 print row warn = ("Error! Row %d could not be parsed. Skipping." % (idx + 1)) warnings.append(warn) self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1)) continue pos = row[poscol] ref = row[refcol] pos_int = 0 ref_char = '' chrom = "chr%s" % (row[chromcol]) warn = ("Error! Found '%s' in %s column of row %d; expected " "1-22, X, or Y. Skipping." % (row[chromcol], chromcol, idx + 1)) # assume that chrom is 1-22, X or Y try: chrom_int = int(row[chromcol]) if chrom_int < 1 or chrom_int > 22: logging.warning(warn) warnings.append(warn) self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1)) continue except: chrom_str = row[chromcol].lower() if chrom_str != 'x' and chrom_str != 'y': logging.warning(warn) warnings.append(warn) self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1)) continue # test that pos is an int try: pos_int = int(pos) except: warn = ("Error! Found '%s' in %s column of row %d; expected " "an integer. Skipping." % (pos, poscol, idx + 1)) logging.warning(warn) warnings.append(warn) self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1)) continue # test that ref is a single character in A,C,T,G lowerRef = ref.lower() if len(lowerRef) > 1 or \ (lowerRef!='a' and lowerRef!='c' and lowerRef!='t' and lowerRef!='g'): warn = ("Error! Found '%s' in %s column of row %d; expected " "A, C, T, or G. Skipping." % (ref, refcol, idx + 1)) logging.warning(warn) warnings.append(warn) self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1)) continue hgsid = '' if app.config['GB'] == 'UCSC': hgsid = genomebrowser.gb_getSessionId() genomeRef = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom, left=(int(pos) - 1), right=(int(pos)), leftPad=0, rightPad=0) else: genome = sequenceutils.loadGenome(genomeFile) genomeRef = sequenceutils.getSequence(genome, chrom, int(pos) - 1, int(pos)) if genomeRef != ref: warn = ("Warning! Reference '%s' for chromosome %s, " "position %s was found to be '%s' in the genome file." % (ref, chrom, pos, genomeRef)) logging.warning(warn) warnings.append(warn) seqStart = int(pos) - bracketlen - 1 seqEnd = seqStart + bracketlen + bracketlen + 1 # UCSC defaults to all upper case if app.config['GB'] == 'UCSC': seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom, left=(int(pos) - 1), right=(int(pos)), leftPad=500, rightPad=500) else: seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) bseq = sequenceutils.bracketSequence(seq).upper() primer = webprimer3.getPrimer(bseq, chrom, int(pos), primerlen) if primer == None: logging.warning('getPrimer returned None') self.primers.append(primer) logger.debug('Updating state for task id %s' % (str(task_id))) self.update_state(state='PROGRESS', meta={ 'current': idx, 'total': rowCount, 'warnings': warnings }) logger.debug('Creating output file ...') filename = str(task_id) + '.csv' path = os.path.join(app.config['UPLOAD_FOLDER'], filename) logger.debug('Filename: %s' % (filename)) logger.debug('Path: %s' % (path)) fileutils.primersToCsv(self.primers, path) logger.debug('Done writing to file') logger.info('Returning result') return {'current': rowCount, 'total': rowCount, 'warnings': warnings}
def processRows(self,rows,genomeFile, db='hg38', chromcol='#CHROM',poscol='POS',refcol='REF', bracketlen=500,primerlen='200-500'): # celery kung fu self.primers = list() #self.warnings = list() warnings = list() task_id = processRows.request.id rowCount = len(rows) for idx,row in enumerate(rows): logger.info('Processing row %d'%(idx)) warnings.append('Processing row %d'%(idx)) if chromcol not in row or \ poscol not in row or \ refcol not in row: print "~"*20+" MISSING COL IN ROW "+"~"*20 print row warn = ("Error! Row %d could not be parsed. Skipping."%(idx+1)) warnings.append(warn) self.primers.append(Primer('ERROR',-1,'ERROR','ERROR',-1)) continue pos = row[poscol] ref = row[refcol] pos_int = 0 ref_char = '' chrom = "chr%s"%(row[chromcol]) warn = ("Error! Found '%s' in %s column of row %d; expected " "1-22, X, or Y. Skipping."%(row[chromcol],chromcol,idx+1)) # assume that chrom is 1-22, X or Y try: chrom_int = int(row[chromcol]) if chrom_int < 1 or chrom_int > 22: logging.warning(warn) warnings.append(warn) self.primers.append( Primer('ERROR',-1,'ERROR','ERROR',-1)) continue except: chrom_str = row[chromcol].lower() if chrom_str!='x' and chrom_str!='y': logging.warning(warn) warnings.append(warn) self.primers.append( Primer('ERROR',-1,'ERROR','ERROR',-1)) continue # test that pos is an int try: pos_int = int(pos) except: warn = ("Error! Found '%s' in %s column of row %d; expected " "an integer. Skipping."%(pos,poscol,idx+1)) logging.warning(warn) warnings.append(warn) self.primers.append( Primer('ERROR',-1,'ERROR','ERROR',-1)) continue # test that ref is a single character in A,C,T,G lowerRef = ref.lower() if len(lowerRef) > 1 or \ (lowerRef!='a' and lowerRef!='c' and lowerRef!='t' and lowerRef!='g'): warn = ("Error! Found '%s' in %s column of row %d; expected " "A, C, T, or G. Skipping."%(ref,refcol,idx+1)) logging.warning(warn) warnings.append(warn) self.primers.append( Primer('ERROR',-1,'ERROR','ERROR',-1)) continue hgsid = '' if app.config['GB'] == 'UCSC': hgsid = genomebrowser.gb_getSessionId() genomeRef = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom, left=(int(pos)-1), right=(int(pos)), leftPad=0, rightPad=0) else: genome = sequenceutils.loadGenome(genomeFile) genomeRef = sequenceutils.getSequence(genome, chrom, int(pos)-1, int(pos)) if genomeRef != ref: warn = ("Warning! Reference '%s' for chromosome %s, " "position %s was found to be '%s' in the genome file." %(ref,chrom,pos,genomeRef)) logging.warning(warn) warnings.append(warn) seqStart = int(pos) - bracketlen - 1 seqEnd = seqStart + bracketlen + bracketlen + 1 # UCSC defaults to all upper case if app.config['GB'] == 'UCSC': seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom, left=(int(pos)-1), right=(int(pos)), leftPad=500, rightPad=500) else: seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd) bseq = sequenceutils.bracketSequence(seq).upper() primer = webprimer3.getPrimer(bseq, chrom, int(pos), primerlen) if primer == None: logging.warning('getPrimer returned None') self.primers.append(primer) logger.debug('Updating state for task id %s'%(str(task_id))) self.update_state(state='PROGRESS', meta={'current':idx, 'total': rowCount, 'warnings': warnings}) logger.debug('Creating output file ...') filename = str(task_id) + '.csv' path = os.path.join(app.config['UPLOAD_FOLDER'],filename) logger.debug('Filename: %s'%(filename)) logger.debug('Path: %s'%(path)) fileutils.primersToCsv(self.primers,path) logger.debug('Done writing to file') logger.info('Returning result') return {'current': rowCount, 'total': rowCount, 'warnings': warnings }