Python CmpH5Reader示例，pbcore.io.CmpH5Reader Python示例

示例#1

0

显示文件

文件： summarize_compare_by_movie.py 项目： MShaffar19/pbreports

def _get_post_mapping_from_movies(allMovies, cmp_h5):
    """
    Go through all movies post alignment.


    returns dict of {movie:MovieStats}
    """
    postMappingMovies = {}

    reader = CmpH5Reader(cmp_h5)

    for alignment in reader:
        # returns a tuple of
        # (2, 'm101210_151341_Jan_p1_b15', 100.0, 0.009999999776482582)
        movie_info = alignment.movieInfo

        movie = movie_info[1]
        if movie not in postMappingMovies:
            stats = allMovies[movie]
            postMappingMovies[movie] = MovieStats(stats.expt,
                                                  stats.chip, stats.movie,
                                                  stats.inst,
                                                  movieType=stats.movieType,
                                                  setId=stats.setId,
                                                  partId=stats.partId,
                                                  cellId=stats.cellId,
                                                  date=stats.date)
        postMappingMovies[movie].add(alignment)

    reader.close()

    return postMappingMovies

示例#2

0

显示文件

文件： CmpH5Compare.py 项目： reiverjohn/pbh5tools

def cmpH5Summarize(inCmp, movieSummary=True, refSummary=True):
    """Summarize a cmp.h5 file"""
    reader = CmpH5Reader(inCmp)
    tstr   = "filename: %s\nversion:  %s\nn reads:  %d\nn refs:   " + \
        "%d\nn movies: %d\nn bases:  %d\navg rl:   %d\navg acc:  %g"

    rl, acc, mov = zip(*[(r.readLength, r.accuracy, r.movieInfo[0])
                         for r in reader])

    summaryStr = (
        tstr % (os.path.basename(reader.file.filename), reader.version,
                len(reader), len(reader.referenceInfoTable), len(set(mov)),
                NP.sum(rl), NP.round(NP.mean(rl)), NP.round(NP.mean(acc), 4)))
    eTbl = Tbl(nBases=Sum(ReadLength),
               avgReadLength=Mean(ReadLength),
               avgAccuracy=Mean(Accuracy))

    movieSummaryTxt = rec2txt(toRecArray(
        query(reader, what=eTbl, groupBy=Movie)),
                              padding=5,
                              precision=1)

    refSummaryTxt = rec2txt(toRecArray(
        query(reader, what=eTbl, groupBy=Reference)),
                            padding=5,
                            precision=1)

    return (summaryStr + ("\n\n\t Movie Summary:\n" +
                          (movieSummaryTxt if movieSummary else "\n")) +
            ("\n\n\t Reference Summary:\n" +
             (refSummaryTxt if refSummary else "\n")))

示例#3

0

显示文件

    def _mainLoop(self):
        """
        Main loop
        First launch the worker and writer processes
        Then we loop over ReferenceGroups in the cmp.h5.  For each contig we will:
        1. Load the sequence into the main memory of the parent process
        3. Chunk up the contig and submit the chunk descriptions to the work queue
        Finally, wait for the writer process to finish.
        """

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        # Load reference and IpdModel
        self.loadReferenceAndModel(self.args.reference, self.args.infile)

        # Spawn workers
        self._launchSlaveProcesses()

        # WARNING -- cmp.h5 file must be opened AFTER worker processes have been spawned
        # cmp.h5 we're using -- use this to orchestrate the work
        self.cmph5 = CmpH5Reader(self.args.infile)
        logging.info('Generating kinetics summary for [%s]' % self.args.infile)

        #self.referenceMap = self.cmph5['/RefGroup'].asDict('RefInfoID', 'ID')
        #self.alnInfo = self.cmph5['/AlnInfo'].asRecArray()

        # Main loop -- we loop over ReferenceGroups in the cmp.h5.  For each contig we will:
        # 1. Load the sequence into the main memory of the parent process
        # 2. Fork the workers
        # 3. chunk up the contig and

        self.workChunkCounter = 0

        # Iterate over references
        for ref in self.refInfo:
            logging.info('Processing reference entry: [%s]' % ref.ID)
            self._queueChunksForReference(ref)

        # Shutdown worker threads with None sentinels
        for i in xrange(self.args.numWorkers):
            self._workQueue.put(None)

        for w in self._workers:
            w.join()

        # Join on the result queue and the resultsCollector process.
        # This ensures all the results are written before shutdown.
        self.monitoringThread.join()
        self._resultsQueue.join()
        self._resultCollectorProcess.join()
        logging.info("ipdSummary.py finished. Exiting.")
        del self.cmph5
        return 0

示例#4

0

显示文件

文件： ReferenceUtils.py 项目： lhon/kineticsTools

    def loadCmpH5Chemistry(cmpH5File):
        with CmpH5Reader(cmpH5File) as f:
            chems = f.sequencingChemistry

        chemCounts = {k: len(list(v)) for k, v in itertools.groupby(chems)}
        majorityChem = max(chemCounts, key=chemCounts.get)
        return majorityChem

示例#5

0

显示文件

文件： CmpH5Compare.py 项目： reiverjohn/pbh5tools

def cmpH5Validate(inCmp):
    """Validate a cmp.h5 file"""
    try:
        reader = CmpH5Reader(inCmp)
        return True
    except:
        return False

示例#6

0

显示文件

def cmpH5Select(inCmpFile,
                outCmp,
                idxs=None,
                groupByStr=None,
                groupByCsv=None,
                whereStr=None,
                outDir="."):
    """Take a vector of indices or a where expression and select a set
    of alignments. If a groupBy is specified, then produce a cmp.h5
    file for each distinct member of the grouping."""
    if idxs:
        doSelect(inCmpFile, outCmp, idxs)
    else:
        where = DefaultWhere if whereStr is None else eval(whereStr)
        groupBy = DefaultGroupBy if groupByStr is None else eval(groupByStr)
        idxVecs = query(CmpH5Reader(inCmpFile),
                        what=AlignmentIdx,
                        where=where,
                        groupBy=groupBy,
                        groupByCsv=groupByCsv)
        keys = idxVecs.keys()

        ## XXX: Should the resultant files be sorted?
        if len(keys) == 1:
            doSelect(inCmpFile, outCmp, idxVecs[keys[0]])
        else:
            for k in keys:
                #For groupByCsv, skip group of indexes not identified in csv
                if k == NOTINCSV_LABEL:
                    continue
                logging.debug("Processing output for %s" % str(k))
                doSelect(inCmpFile, "/".join([outDir,
                                              "%s.cmp.h5" % str(k)]),
                         idxVecs[k])

示例#7

0

显示文件

文件： CmpH5Compare.py 项目： reiverjohn/pbh5tools

def cmpH5Equal(inCmp1, inCmp2):
    """Compare two cmp.h5 files for equality. Here equality means the
    alignments are the same and they are in the same
    order. Additionally, the reference information in the files has to
    be the same."""
    cmp1 = CmpH5Reader(inCmp1)
    cmp2 = CmpH5Reader(inCmp2)

    if not len(cmp1) == len(cmp2):
        return (False, "cmp.h5 files differ in length (%d, %d)" %
                (len(cmp1), len(cmp2)))

    aeq = [a1 == a2 for a1, a2 in zip(cmp1, cmp2)]
    if not all(aeq):
        return (False, "%d alignments differ" % (len(aeq) - sum(aeq)))

    return (True, )

示例#8

0

显示文件

文件： ReferenceUtils.py 项目： lhon/kineticsTools

    def loadCmpH5Tables(cmpH5File):
        """Load the cmp.h5, get the ReferenceInfo table, in order to correctly number the contigs, then close the cmp.h5"""
        cmph5 = CmpH5Reader(cmpH5File)
        refInfoTable = cmph5.referenceInfoTable
        movieInfoTable = cmph5.movieInfoTable
        cmph5.close()
        del cmph5

        return (refInfoTable, movieInfoTable)

示例#9

0

显示文件

def labelAlignments():
    logging.info("Labeling alignments using: %s" % runner.args.inputFofn)
    bcFofn = BarcodeH5Fofn(runner.args.inputFofn)

    with CmpH5Reader(runner.args.cmpH5) as cmpH5:
        bcDS = n.zeros((len(cmpH5), 5), dtype="int32")

        for (i, aln) in enumerate(cmpH5):
            bcReader = bcFofn.readerForMovie(aln.movieInfo.Name)
            try:
                lZmw = bcReader.labeledZmwFromHoleNumber(aln.HoleNumber)
                if lZmw.nScored < runner.args.minNumBarcodes or \
                        lZmw.averageScore < runner.args.minAvgBarcodeScore or \
                        lZmw.scoreRatio < runner.args.minScoreRatio:
                    lZmw = None
            except KeyError:
                lZmw = None

            if lZmw:
                bcDS[i, :] = n.array([
                    lZmw.nScored, lZmw.bestIdx, lZmw.bestScore,
                    lZmw.secondBestIdx, lZmw.secondBestScore
                ])
            else:
                # either no barcode was found for this guy or they got
                # filtered, hence the NULL_BARCODE
                bcDS[i, :] = n.array([
                    0,
                    len(bcReader.barcodeLabels), 0,
                    len(bcReader.barcodeLabels), 0
                ])

    # write to the cmp.h5 file.
    H5 = h5.File(runner.args.cmpH5, 'r+')
    if BC_INFO_ID in H5:
        del H5[BC_INFO_ID]
    if BC_INFO_NAME in H5:
        del H5[BC_INFO_NAME]

    # we use the first one to get the labels, if somehow they
    # don't have all of the same stuff that will be an issue.
    bcLabels = n.concatenate(
        (bcFofn.barcodeLabels, n.array([BARCODE_DELIMITER])))
    H5.create_dataset(BC_INFO_ID,
                      data=n.array(range(0, len(bcLabels))),
                      dtype='int32')
    H5.create_dataset(BC_INFO_NAME, data=bcLabels, dtype=h5.new_vlen(str))
    if BC_ALN_INFO_DS in H5:
        del H5[BC_ALN_INFO_DS]
    bcDS = H5.create_dataset(BC_ALN_INFO_DS, data=bcDS, dtype='int32')
    bcDS.attrs['ColumnNames'] = n.array(
        ['count', 'index1', 'score1', 'index2', 'score2'])
    #force BarcodeMode to have numpy dtype for CmpH5Sort 'extra datasets' routine
    bcDS.attrs['BarcodeMode'] = n.array(bcFofn.scoreMode)
    H5.close()

示例#10

0

显示文件

def openAlignmentFile(fname, referenceFastaFname=None, sharedIndex=None):
    """
    Factory function to get a handle to a reader for an alignment file
    (cmp.h5 or BAM), not requiring index capability

    (A `sharedIndex` can still be passed for opening a cmp.h5, for which
    the index is compulsory.)
    """
    if fname.endswith("cmp.h5"):
        return CmpH5Reader(fname, sharedIndex=sharedIndex)
    elif fname.endswith("bam"):
        return BamReader(fname, referenceFastaFname)

示例#11

0

显示文件

    def _run(self):
        logging.info("Worker %s (PID=%d) started running" %
                     (self.name, self.pid))

        self.caseCmpH5 = CmpH5Reader(self.options.infile)

        if not self.options.control is None:
            # We have a cmp.h5 with control vales -- load that cmp.h5
            self.controlCmpH5 = CmpH5Reader(self.options.control)
        else:
            self.controlCmpH5 = None

        self.onStart()

        while True:
            if self.isTerminated():
                break

            chunkDesc = self._workQueue.get()
            if chunkDesc is None:
                # Sentinel indicating end of input.  Place a sentinel
                # on the results queue and end this worker process.
                self._resultsQueue.put(None)
                self._workQueue.task_done()
                break
            else:
                (chunkId, datum) = chunkDesc
                logging.info("Got chunk: (%s, %s) -- Process: %s" %
                             (chunkId, str(datum), current_process()))
                result = self.onChunk(datum)

                logging.debug("Process %s: putting result." %
                              current_process())
                self._resultsQueue.put((chunkId, result))
                self._workQueue.task_done()

        self.onFinish()

        logging.info("Process %s (PID=%d) done; exiting." %
                     (self.name, self.pid))

示例#12

0

显示文件

文件： utils.py 项目： sophy7074/GenomicConsensus

def loadCmpH5(filename, disableChunkCache=False):
    """
    Get a CmpH5Reader object, disabling the chunk cache if requested.
    """
    filename = os.path.abspath(os.path.expanduser(filename))
    if not disableChunkCache:
        file = h5py.File(filename, "r")
    else:
        propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS)
        propfaid.set_cache(0, 0, 0, 0)
        fid = h5py.h5f.open(filename, flags=h5py.h5f.ACC_RDONLY, fapl=propfaid)
        file = h5py.File(fid)
    return CmpH5Reader(file)

示例#13

0

显示文件

文件： main.py 项目： sophy7074/GenomicConsensus

 def _readCmpH5Input(self):
     """
     Read the CmpH5 input file into a CmpH5 object and
     store it as self._inCmpH5.
     """
     fname = options.inputFilename
     if options.usingBam:
         self._inCmpH5 = BamReader(fname)
     else:
         logging.debug(
             "Before open on main process, # hdf5 objects open: %d" %
             h5py.h5f.get_obj_count())
         self._inCmpH5 = CmpH5Reader(fname)

示例#14

0

显示文件

文件： test_pbcore_io_AlnFileReaders.py 项目： jrharting/pbcore

    def testLazyChemistryResolution(self):
        """
        The CmpH5Reader allows reading of files that have missing
        chemistry information---an exception will be thrown only upon
        attempts to access the information.  We need to retain this
        behavior for compatibility.  """
        oldCmpH5 = data.getCmpH5()

        C = CmpH5Reader(oldCmpH5) # no exception here

        with assert_raises(ChemistryLookupError):
            C.sequencingChemistry

        with assert_raises(ChemistryLookupError):
            C[0].sequencingChemistry

示例#15

0

显示文件

def _get_control_reads(control_cmph5):
    """
    Return a tuple of len == 2:
    Position 0: (string) control name 
    Position 1: (dict) dict of string to tuple (int,float) . The key is control readId,  
    position 0 of the tuple is accuracy, position 1 is length.
    :param control_cmph5: (str) path to control_reads.cmp.h5
    """
    control_reads = {}
    c = CmpH5Reader(control_cmph5)
    for ca in c:
        read_id = '%s/%d' % (ca.movieInfo.Name, ca.HoleNumber)
        if read_id in control_reads:
            log.warn(
                'read {i} is control read and has subreads?'.format(i=read_id))
        control_reads[read_id] = (ca.accuracy, ca.readLength)
    name = c.referenceInfo('ref000001').FullName
    return name, control_reads

示例#16

0

显示文件

def openIndexedAlignmentFile(fname,
                             referenceFastaFname=None,
                             sharedIndex=None):
    """
    Factory function to get a handle to a reader for an alignment file
    (cmp.h5 or BAM), requiring index capability (built-in for cmp.h5;
    requires bam.pbi index for BAM

    The reference FASTA, if provided, must have a FASTA index
    (fasta.fai).
    """
    if fname.endswith("cmp.h5"):
        return CmpH5Reader(fname, sharedIndex=sharedIndex)
    elif fname.endswith("bam"):
        return IndexedBamReader(fname,
                                referenceFastaFname=referenceFastaFname,
                                sharedIndex=sharedIndex)
    else:
        raise ValueError, "Invalid alignment file suffix"

示例#17

0

显示文件

文件： mask_aligned_reads.py 项目： Velcon-Zheng/pbalign

    def _extractAlignedReads(self):
        """Grab a mapping of all movie names of aligned reads to hole numbers.
           and return { Movie: [HoleNumbers ...] }.
        """
        alignedReads = {}

        try:
            reader = CmpH5Reader(self.inCmpFile)

            for movie in reader.movieInfoTable.Name:
                alignedReads.setdefault(movie, set())

            for i in reader:
                alignedReads[i.movieInfo.Name].add(i.HoleNumber)
            reader.close()
        except (IndexError, EmptyCmpH5Error):
            msg = "No aligned reads found in {x}".format(x=self.inCmpFile)
            sys.stderr.write(msg + "\n")
            logging.warn(msg)

        return alignedReads

示例#18

0

显示文件

文件： test.py 项目： jgurtowski/kineticsTools

    def setUp(self):

        # Load the lambda genome from our sample data

        dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               'data')
        ref = os.path.join(dataDir, 'lambda', 'sequence', 'lambda.fasta')
        cmpFile = os.path.join(dataDir, "p4-c2-lambda-mod-decode.cmp.h5")

        self.contigs = ReferenceUtils.loadReferenceContigs(ref, cmpFile)
        self.ipdModel = IpdModel(self.contigs)

        # Create a functional KineticWorker object that can be poked at manually.
        self.kw = KineticWorker(self.ipdModel)
        self.cmpH5 = CmpH5Reader(cmpFile)

        # Put in our cmp.h5 - this is normally supplied by the Worker superclass
        self.kw.caseCmpH5 = self.cmpH5
        self.kw.controlCmpH5 = None

        self.kw.options = self.getOpts()

示例#19

0

显示文件

文件： reprocessMotifSites.py 项目： jgurtowski/kineticsTools

    def _mainLoop(self):

        # See comments in ipdSummary.py
        gc.disable()

        # Load reference and IpdModel
        # self.loadReference()

        # Load reference and IpdModel
        self.loadReferenceAndModel(self.args.reference, self.args.infile)

        # Spawn workers
        self._launchSlaveProcesses()

        # cmp.h5 we're using -- use this to orchestrate the work
        self.cmph5 = CmpH5Reader(self.args.infile)
        logging.info('Generating kinetics summary for [%s]' % self.args.infile)

        self.workChunkCounter = 0
        self._queueChunksForReference()

        # Shutdown worker threads with None sentinels
        for i in xrange(self.args.numWorkers):
            self._workQueue.put(None)

        for w in self._workers:
            w.join()

        # Join on the result queue and the resultsCollector process.
        # This ensures all the results are written before shutdown.
        self.monitoringThread.join()
        self._resultsQueue.join()
        self._resultCollectorProcess.join()
        logging.info("reprocessMotifSites.py finished. Exiting.")
        del self.cmph5
        return 0

示例#20

0

显示文件

文件： test_bam.py 项目： sophy7074/GenomicConsensus

    def __init__(self):
        bamFname, cmpFname = D.getBamAndCmpH5()
        lambdaFasta = D.getLambdaFasta()

        self.b = PacBioBamReader(bamFname, lambdaFasta)
        self.c = CmpH5Reader(cmpFname)
        self.bBasic = BamReader(bamFname)

        # Note that sorting orders are not generally the same... BAM
        # sorts + alns before - alns, when there is a tie on tStart;
        # we don't do this in cmp.h5 (we next sort on tEnd).  However
        # in this file there are no ties on tStart.
        self.bAlns = list(self.b)
        self.bFwd = self.bAlns[0]
        self.bRev = self.bAlns[1]

        self.cAlns = list(self.c)
        self.cFwd = self.cAlns[0]
        self.cRev = self.cAlns[1]

        self.cFwdClipped = self.cFwd.clippedTo(10, 60)
        self.bFwdClipped = self.bFwd.clippedTo(10, 60)
        self.cRevClipped = self.cRev.clippedTo(310, 360)
        self.bRevClipped = self.bRev.clippedTo(310, 360)

示例#21

0

显示文件

文件： test_cmph5lib_CmpH5Sort.py 项目： reiverjohn/pbh5tools

 def __init__(self):
     self.h5FileName = data.getCmpH5()
     self.cmpH5 = CmpH5Reader(self.h5FileName)

示例#22

0

显示文件

文件： getIPD.py 项目： narechan/amnh

                    help='index of reference contig (1 if single contig)')
parser.add_argument(
    'refPos',
    type=int,
    help='position of modified cognate, 4th column in motifs.gff')
parser.add_argument('-f',
                    dest='fwdStrand',
                    action='store_true',
                    help='us -f flag if + strand in motifs.gff')
parser.add_argument('-r',
                    dest='fwdStrand',
                    action='store_false',
                    help='us -r flag if - strand in motifs.gff')
parser.add_argument(
    '-k',
    type=int,
    default=1,
    help=
    'min number of bases on each side of modified base which must align in read'
)
parser.add_argument('-q,--minMapQV',
                    dest='minMapQv',
                    type=int,
                    default=10,
                    help='minimum mapping QV of read')

args = parser.parse_args()
print(
    getIPD(CmpH5Reader(args.cmpH5), args.refIdx, args.refPos, args.fwdStrand,
           args.k, args.minMapQv))

示例#23

0

显示文件

def writeLinesFromCmph5 (cmph5, leftAnchor, rightAnchor, offsetDict):
     reader           = CmpH5Reader(cmph5)
     alignments_list  = [r for r in reader]
     #refInfoTable = reader.referenceInfoTable
     #refDict = {}
     #for i in range (len(refInfoTable)):
     #     rid = refInfoTable[i][0]
     #     rn = refInfoTable[i][2]
     #     rname = refInfoTable[i][3]
     #     refDict[rn] = rname
          #print refInfoTable[i]
          
     for i, alignment in enumerate(alignments_list):

          #movieID       = str(alignment.movieInfo[0])
          alignedLength = alignment.alignedLength
          fps           = alignment.movieInfo[2]
          #refName       = alignment.referenceName
          #refName = refDict[refName]
          refName       = str(alignment.referenceInfo[3])
          #refGroupID    = alignment.refGroupID
          #refName = refDict[refGroupID]
          #zmw           = str(alignment.HoleNumber)
          #mol           = str(alignment.MoleculeID)
          if alignment.isForwardStrand:
              strand = str(0)
          else:
              strand = str(1)
          ref_bases  = alignment.reference()
          read_calls = alignment.transcript()
          ref_pos    = list(alignment.referencePositions())
          IPD        = list(alignment.IPD())

          delim           = " "

          error_mk = []
          for read_call in read_calls:
              # Go through all entries and flag which positions are MM/indels
              if read_call != "M":
                  # Mismatch or indel at this position!
                  error_mk.append(1)
              else:
                  error_mk.append(0)

          # Get the indices of all the non-matches
          error_idx = [i for (i,val) in enumerate(error_mk) if val == 1]
          for error_id in error_idx:
              try:
                  for j in range(leftAnchor):
                      error_mk[error_id - (j+1)] = 1
                  for j in range(rightAnchor):
                      error_mk[error_id + (j+1)] = 1
              except IndexError:
                  pass
          error_mk = np.array(error_mk)

          ipds       = np.array(IPD) / fps
          strands    = np.array([strand]     * alignedLength)

          ref_bases  = np.array(list(ref_bases))
          ref_pos    = np.array(ref_pos)
          read_calls = np.array(list(read_calls))

          ref_bases  =  ref_bases[error_mk==0]
          ref_pos    =    ref_pos[error_mk==0]
          read_calls = np.array(read_calls)[error_mk==0]
          ipds       =       ipds[error_mk==0]
          strands    =    strands[error_mk==0]
          ipds = ipds/np.median(ipds)
          for i in range (ipds.size):
              newpos = ref_pos[i] + offsetDict[refName]
              print newpos, ipds[i], strand

示例#24

0

显示文件

文件： run_bqcy.py 项目： njbooher/pquiver

import numpy as np
from pbcore.io import CmpH5Reader
from GenomicConsensus import reference
from projutils import getReads
from bqcy.bqcy import run_bqcy

cmpH5 = CmpH5Reader(
    '/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5'
)
reference.loadFromFile(
    "/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta",
    cmpH5)

tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference,
                                                  (146000, 146050), 64, 100)

#print(readSeqs[:, 65:])
#exit()

print("POA Consensus: " + ''.join(map(chr, tmplSeq.tolist())))

tmplSeq = np.zeros((64), dtype=np.uint8)
tmplOrds = map(ord, "A" * 50)
tmplSeq[:len(tmplOrds)] = tmplOrds

results = np.zeros(8 * tmplSeq.shape[0], dtype=np.float64)
origTmplScore, bestMutantScore, bestMutatedSeq = run_bqcy(
    tmplSeq, readSeqs, qvInfo, results)
print("Polished: " + ''.join(map(chr, np.asarray(bestMutatedSeq).tolist())))
print("Fake Template: " + ''.join(map(chr, np.asarray(tmplSeq).tolist())))
print(results)

示例#25

0

显示文件

 def test_openFromH5File(self):
     cmpH5Filename = data.getCmpH5()
     c = CmpH5Reader(h5py.File(cmpH5Filename, "r"))
     EQ("1.2.0.SF", c.version)

示例#26

0

显示文件

 def __init__(self):
     cmpH5Filename = data.getCmpH5()
     self._inCmpH5 = CmpH5Reader(cmpH5Filename)
     self.hit0 = self._inCmpH5[0]
     self.hit1 = self._inCmpH5[1]

示例#27

0

显示文件

文件： rainbow.py 项目： lpp1985/lpp_Script

 def _openAlignments():
     if in_fn.endswith(".cmp.h5"):
         return CmpH5Reader(in_fn)
     else:
         return openDataFile(in_fn)

示例#28

0

显示文件

文件： main.py 项目： sophy7074/GenomicConsensus

    def main(self):

        # This looks scary but it's not.  Python uses reference
        # counting and has a secondary, optional garbage collector for
        # collecting garbage cycles.  Unfortunately when a cyclic GC
        # happens when a thread is calling cPickle.dumps, the
        # interpreter crashes sometimes.  See Bug 19704.  Since we
        # don't leak garbage cycles, disabling the cyclic GC is
        # essentially harmless.
        gc.disable()

        parseOptions()
        self._algorithm = self._algorithmByName(options.algorithm)
        self._setupLogging()
        random.seed(42)

        logging.info("h5py version: %s" % h5py.version.version)
        logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("Starting.")

        atexit.register(self._cleanup)
        if options.doProfiling:
            self._makeTemporaryDirectory()

        if options.usingBam:
            logging.warn(
                "'fancyChunking' not yet available for BAM, disabling")
            options.fancyChunking = False

            # Peek at the bam file to build tables
            with BamReader(options.inputFilename) as peekCmpH5:
                logging.info("Peeking at BAM file %s" % options.inputFilename)
                logging.info("Input BAM data: numAlnHits=%d" % len(peekCmpH5))
                resolveOptions(peekCmpH5)
                self._loadReference(peekCmpH5)
                self._checkFileCompatibility(peekCmpH5)
                self._configureAlgorithm(options, peekCmpH5)
        else:
            # We need to peek at the cmp.h5 file to build the The
            # refGroupId<->refGroupFullName mapping, and to determine
            # whether the selected algorithm parameters (Quiver) are
            # compatible with the data.  But we then have to close the
            # file, and let the "real" open happen after the fork.
            with CmpH5Reader(options.inputFilename) as peekCmpH5:
                logging.info("Peeking at CmpH5 file %s" %
                             options.inputFilename)
                logging.info("Input CmpH5 data: numAlnHits=%d" %
                             len(peekCmpH5))
                resolveOptions(peekCmpH5)
                self._loadReference(peekCmpH5)
                self._checkFileCompatibility(peekCmpH5)
                self._configureAlgorithm(options, peekCmpH5)
                options.disableHdf5ChunkCache = self._shouldDisableChunkCache(
                    peekCmpH5)
                if options.disableHdf5ChunkCache:
                    logging.info(
                        "Will disable HDF5 chunk cache (large number of datasets)"
                    )
            logging.debug("After peek, # hdf5 objects open: %d" %
                          h5py.h5f.get_obj_count())

        if options.dumpEvidence:
            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readCmpH5Input()

        monitoringThread = threading.Thread(target=monitorSlaves,
                                            args=(self, ))
        monitoringThread.start()

        try:
            if options.doProfiling:
                cProfile.runctx("self._mainLoop()",
                                globals=globals(),
                                locals=locals(),
                                filename=os.path.join(
                                    options.temporaryDirectory,
                                    "profile-main.out"))

            elif options.doDebugging:
                logging.info("PID: %d", os.getpid())
                try:
                    import ipdb as pdb
                except:
                    import pdb
                return pdb.runeval("self._mainLoop()", globals(), locals())
            else:
                self._mainLoop()
        except:
            why = traceback.format_exc()
            self.abortWork(why)

        monitoringThread.join()

        if self._aborting:
            logging.error("Aborting")
            return -1
        else:
            logging.info("Finished.")

        if options.doProfiling:
            self._printProfiles()

        # close h5 file.
        self._inCmpH5.close()
        return 0

示例#29

0

显示文件

    return (h[1][0:-1], h[0])


dh = StatsHist(dStats, dataset="rs", which="m", minValue=0.25)
fh = StatsHist(fStats, dataset="rs", which="m", minValue=0.25)
duh = StatsHist(dStats, dataset="rs", which="um", minValue=0.25)
fuh = StatsHist(fStats, dataset="rs", which="um", minValue=0.25)

ax = plt.axes
plt.scatter(dh[0], dh[1], axes=ax)
plt.scatter(fh[0], fh[1], axes=ax, color="red")
plt.scatter(duh[0], duh[1], axes=ax, color="LightBlue")
plt.scatter(fuh[0], fuh[1], axes=ax, color="pink")
plt.show()

dCmpR = CmpH5Reader(dcmp)

mgc = numpy.array([GetSubreadGC(sr) for sr in gStats.npdata["s"]])
umgc = numpy.array([GetSubreadGC(sr) for sr in gStats.npdata["us"]])

dmgc = numpy.array([GetSubreadGC(sr) for sr in dStats.npdata["s"]])
dumgc = numpy.array([GetSubreadGC(sr) for sr in dStats.npdata["us"]])

hmgc = numpy.array([GetSubreadGC(sr) for sr in hStats.npdata["s"]])
humgc = numpy.array([GetSubreadGC(sr) for sr in hStats.npdata["us"]])


def GetLengths(subreads):
    return numpy.array([len(sr.basecalls()) for sr in subreads])

示例#30

0

显示文件

文件： YieldAccumulation.py 项目： knyquist/biotk

 def _openCmpH5(self, aset_path):
     print aset_path
     return CmpH5Reader(aset_path), True