Пример #1
0
def testGetSequence(genomeFilePath):
    chrom = 'chr1'
    seq = '808984'
    seqlen = 1
    seqStart = int(seq) - seqlen - 500
    seqEnd = seqStart + seqlen + 1000
    genome = sequenceutils.loadGenome(genomeFilePath)
    seqA = sequenceutils.getSequence(genome,chrom,seqStart,seqEnd)
    seqA = seqA.upper()
    print seqA

    print "~"*50    

    # get sesion id
    print "Getting sesion id ..."
    hgsid = genomebrowser.gb_getSessionId()
    print hgsid
    if hgsid == "":
        print "blank hgsid. quitting"
        sys.exit(1)
    seqB = genomebrowser.gb_getSequence(hgsid)
    print seqB

    print "~"*50

    if seqA == seqB:
        print "Sequences match!"
    else:
        print "Sequences do not match :("

    bseqA = sequenceutils.bracketSequence(seqA)
    bseqB = sequenceutils.bracketSequence(seqB)
    
    if bseqA != bseqB:
        print "Bracketed sequences do not match!"
        sys.exit(1)

    primerA = webprimer3.getPrimer(bseqA,chrom,int(seq))
    primerB = webprimer3.getPrimer(bseqB,chrom,int(seq))
   
    if primerA.fSeq != primerB.fSeq \
        or primerA.rSeq != primerB.rSeq \
        or primerA.size != primerB.size:
        print "Primer mismatch"
        print "primerA:"
        print primerA
        print "primerB:"
        print primerB
        sys.exit(1)
    else:
        print "Primer match"
        print primerA

    primerList = [primerA,primerB]

    outfile = "/tmp/"+str(uuid.uuid4())+".csv"
    fileutils.primersToCsv(primerList,outfile)
    print "Wrote %s"%(outfile)
Пример #2
0
def testGetSequence(genomeFilePath):
    chrom = 'chr1'
    seq = '808984'
    seqlen = 1
    seqStart = int(seq) - seqlen - 500
    seqEnd = seqStart + seqlen + 1000
    genome = sequenceutils.loadGenome(genomeFilePath)
    seqA = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)
    seqA = seqA.upper()
    print seqA

    print "~" * 50

    # get sesion id
    print "Getting sesion id ..."
    hgsid = genomebrowser.gb_getSessionId()
    print hgsid
    if hgsid == "":
        print "blank hgsid. quitting"
        sys.exit(1)
    seqB = genomebrowser.gb_getSequence(hgsid)
    print seqB

    print "~" * 50

    if seqA == seqB:
        print "Sequences match!"
    else:
        print "Sequences do not match :("

    bseqA = sequenceutils.bracketSequence(seqA)
    bseqB = sequenceutils.bracketSequence(seqB)

    if bseqA != bseqB:
        print "Bracketed sequences do not match!"
        sys.exit(1)

    primerA = webprimer3.getPrimer(bseqA, chrom, int(seq))
    primerB = webprimer3.getPrimer(bseqB, chrom, int(seq))

    if primerA.fSeq != primerB.fSeq \
        or primerA.rSeq != primerB.rSeq \
        or primerA.size != primerB.size:
        print "Primer mismatch"
        print "primerA:"
        print primerA
        print "primerB:"
        print primerB
        sys.exit(1)
    else:
        print "Primer match"
        print primerA

    primerList = [primerA, primerB]

    outfile = "/tmp/" + str(uuid.uuid4()) + ".csv"
    fileutils.primersToCsv(primerList, outfile)
    print "Wrote %s" % (outfile)
Пример #3
0
def processRows(self,
                rows,
                genomeFile,
                db='hg38',
                chromcol='#CHROM',
                poscol='POS',
                refcol='REF',
                bracketlen=500,
                primerlen='200-500'):

    # celery kung fu
    self.primers = list()
    #self.warnings = list()
    warnings = list()
    task_id = processRows.request.id

    rowCount = len(rows)

    for idx, row in enumerate(rows):
        logger.info('Processing row %d' % (idx))
        warnings.append('Processing row %d' % (idx))
        if chromcol not in row or \
            poscol not in row or \
            refcol not in row:
            print "~" * 20 + " MISSING COL IN ROW " + "~" * 20
            print row
            warn = ("Error! Row %d could not be parsed. Skipping." % (idx + 1))
            warnings.append(warn)
            self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1))
            continue

        pos = row[poscol]
        ref = row[refcol]
        pos_int = 0
        ref_char = ''
        chrom = "chr%s" % (row[chromcol])
        warn = ("Error! Found '%s' in %s column of row %d; expected "
                "1-22, X, or Y. Skipping." %
                (row[chromcol], chromcol, idx + 1))
        # assume that chrom is 1-22, X or Y
        try:
            chrom_int = int(row[chromcol])
            if chrom_int < 1 or chrom_int > 22:
                logging.warning(warn)
                warnings.append(warn)
                self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1))
                continue
        except:
            chrom_str = row[chromcol].lower()
            if chrom_str != 'x' and chrom_str != 'y':
                logging.warning(warn)
                warnings.append(warn)
                self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1))
                continue

        # test that pos is an int
        try:
            pos_int = int(pos)
        except:
            warn = ("Error! Found '%s' in %s column of row %d; expected "
                    "an integer. Skipping." % (pos, poscol, idx + 1))
            logging.warning(warn)
            warnings.append(warn)
            self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1))
            continue

        # test that ref is a single character in A,C,T,G
        lowerRef = ref.lower()
        if len(lowerRef) > 1 or \
            (lowerRef!='a' and lowerRef!='c'
            and lowerRef!='t' and lowerRef!='g'):
            warn = ("Error! Found '%s' in %s column of row %d; expected "
                    "A, C, T, or G. Skipping." % (ref, refcol, idx + 1))
            logging.warning(warn)
            warnings.append(warn)
            self.primers.append(Primer('ERROR', -1, 'ERROR', 'ERROR', -1))
            continue

        hgsid = ''
        if app.config['GB'] == 'UCSC':
            hgsid = genomebrowser.gb_getSessionId()
            genomeRef = genomebrowser.gb_getSequence(hgsid,
                                                     db=db,
                                                     chrom=chrom,
                                                     left=(int(pos) - 1),
                                                     right=(int(pos)),
                                                     leftPad=0,
                                                     rightPad=0)
        else:
            genome = sequenceutils.loadGenome(genomeFile)
            genomeRef = sequenceutils.getSequence(genome, chrom,
                                                  int(pos) - 1, int(pos))
        if genomeRef != ref:
            warn = ("Warning! Reference '%s' for chromosome %s, "
                    "position %s was found to be '%s' in the genome file." %
                    (ref, chrom, pos, genomeRef))
            logging.warning(warn)
            warnings.append(warn)

        seqStart = int(pos) - bracketlen - 1
        seqEnd = seqStart + bracketlen + bracketlen + 1
        # UCSC defaults to all upper case
        if app.config['GB'] == 'UCSC':
            seq = genomebrowser.gb_getSequence(hgsid,
                                               db=db,
                                               chrom=chrom,
                                               left=(int(pos) - 1),
                                               right=(int(pos)),
                                               leftPad=500,
                                               rightPad=500)
        else:
            seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)
        bseq = sequenceutils.bracketSequence(seq).upper()
        primer = webprimer3.getPrimer(bseq, chrom, int(pos), primerlen)
        if primer == None:
            logging.warning('getPrimer returned None')
        self.primers.append(primer)
        logger.debug('Updating state for task id %s' % (str(task_id)))
        self.update_state(state='PROGRESS',
                          meta={
                              'current': idx,
                              'total': rowCount,
                              'warnings': warnings
                          })

    logger.debug('Creating output file ...')
    filename = str(task_id) + '.csv'
    path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    logger.debug('Filename: %s' % (filename))
    logger.debug('Path: %s' % (path))
    fileutils.primersToCsv(self.primers, path)
    logger.debug('Done writing to file')

    logger.info('Returning result')
    return {'current': rowCount, 'total': rowCount, 'warnings': warnings}
Пример #4
0
def processRows(self,rows,genomeFile,
    db='hg38',
    chromcol='#CHROM',poscol='POS',refcol='REF',
    bracketlen=500,primerlen='200-500'):

    # celery kung fu
    self.primers = list()
    #self.warnings = list()
    warnings = list()
    task_id = processRows.request.id

    rowCount = len(rows)

    for idx,row in enumerate(rows):
        logger.info('Processing row %d'%(idx)) 
        warnings.append('Processing row %d'%(idx))
        if chromcol not in row or \
            poscol not in row or \
            refcol not in row:
            print "~"*20+" MISSING COL IN ROW "+"~"*20
            print row
            warn = ("Error! Row %d could not be parsed. Skipping."%(idx+1))
            warnings.append(warn)
            self.primers.append(Primer('ERROR',-1,'ERROR','ERROR',-1))
            continue
         
        pos = row[poscol]
        ref = row[refcol]
        pos_int = 0
        ref_char = '' 
        chrom = "chr%s"%(row[chromcol])
        warn = ("Error! Found '%s' in %s column of row %d; expected "
            "1-22, X, or Y. Skipping."%(row[chromcol],chromcol,idx+1))
        # assume that chrom is 1-22, X or Y
        try:
            chrom_int = int(row[chromcol])
            if chrom_int < 1 or chrom_int > 22:
                logging.warning(warn)
                warnings.append(warn)
                self.primers.append(
                    Primer('ERROR',-1,'ERROR','ERROR',-1))
                continue
        except:
            chrom_str = row[chromcol].lower()
            if chrom_str!='x' and chrom_str!='y':
                logging.warning(warn)   
                warnings.append(warn)
                self.primers.append(
                    Primer('ERROR',-1,'ERROR','ERROR',-1))
                continue

        # test that pos is an int
        try:
            pos_int = int(pos)
        except:
            warn = ("Error! Found '%s' in %s column of row %d; expected "
                "an integer. Skipping."%(pos,poscol,idx+1))
            logging.warning(warn)
            warnings.append(warn)
            self.primers.append(
                Primer('ERROR',-1,'ERROR','ERROR',-1))
            continue

        # test that ref is a single character in A,C,T,G
        lowerRef = ref.lower()
        if len(lowerRef) > 1 or \
            (lowerRef!='a' and lowerRef!='c' 
            and lowerRef!='t' and lowerRef!='g'):
            warn = ("Error! Found '%s' in %s column of row %d; expected "
                "A, C, T, or G. Skipping."%(ref,refcol,idx+1))
            logging.warning(warn)
            warnings.append(warn)
            self.primers.append(
                Primer('ERROR',-1,'ERROR','ERROR',-1))
            continue

        hgsid = ''
        if app.config['GB'] == 'UCSC':
            hgsid = genomebrowser.gb_getSessionId()
            genomeRef = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom,
                                                     left=(int(pos)-1),
                                                     right=(int(pos)),
                                                     leftPad=0,
                                                     rightPad=0)
        else:
            genome = sequenceutils.loadGenome(genomeFile)
            genomeRef = sequenceutils.getSequence(genome, chrom,
                                                  int(pos)-1, int(pos))
        if genomeRef != ref:
            warn = ("Warning! Reference '%s' for chromosome %s, "
                "position %s was found to be '%s' in the genome file."
                %(ref,chrom,pos,genomeRef))
            logging.warning(warn)
            warnings.append(warn)

        seqStart = int(pos) - bracketlen - 1 
        seqEnd = seqStart + bracketlen + bracketlen + 1 
        # UCSC defaults to all upper case
        if app.config['GB'] == 'UCSC':
            seq = genomebrowser.gb_getSequence(hgsid, db=db, chrom=chrom,
                                               left=(int(pos)-1),
                                               right=(int(pos)),
                                               leftPad=500,
                                               rightPad=500)
        else:
            seq = sequenceutils.getSequence(genome, chrom, seqStart, seqEnd)
        bseq = sequenceutils.bracketSequence(seq).upper() 
        primer = webprimer3.getPrimer(bseq, chrom, int(pos), primerlen)
        if primer == None:
            logging.warning('getPrimer returned None')
        self.primers.append(primer)
        logger.debug('Updating state for task id %s'%(str(task_id)))
        self.update_state(state='PROGRESS', meta={'current':idx, 'total': rowCount, 'warnings': warnings}) 
    
    logger.debug('Creating output file ...')
    filename = str(task_id) + '.csv' 
    path = os.path.join(app.config['UPLOAD_FOLDER'],filename)
    logger.debug('Filename: %s'%(filename))
    logger.debug('Path: %s'%(path))
    fileutils.primersToCsv(self.primers,path)
    logger.debug('Done writing to file')

    logger.info('Returning result')
    return {'current': rowCount, 'total': rowCount, 'warnings': warnings }