def setUpClass(cls):
     log.info("Loading alignments from {c}".format(c=cls.BAM_PATH))
     movie_names, unrolled, datum, columns = from_alignment_file(
         alignment_info_from_bam(cls.BAM_PATH)[cls.MOVIE])
     cls.datum = datum
     cls.unrolled = unrolled
     cls.movie_names = movie_names
     cls.columns = columns
示例#2
0
 def setUpClass(cls):
     log.info("Loading alignments from {c}".format(c=cls.BAM_PATH))
     movie_names, unrolled, datum, columns = from_alignment_file(
         alignment_info_from_bam(cls.BAM_PATH)[cls.MOVIE])
     cls.datum = datum
     cls.unrolled = unrolled
     cls.movie_names = movie_names
     cls.columns = columns
示例#3
0
def analyze_movies(movies, alignment_file_names, stats_models):
    all_results = []
    log.info("collecting data from {n} BAM files...".format(
             n=len(alignment_file_names)))
    for file_name in alignment_file_names:
        log.info("reading {f}.pbi".format(f=file_name))
        results = alignment_info_from_bam(file_name)
        for movie, aln_info in results.iteritems():
            log.info("Analyzing Movie {n} in {f}".format(n=movie, f=file_name))
            args = from_alignment_file(aln_info)
            _process_movie_data(movie, file_name, stats_models, *args)
    log.info("Completed analyzing {n} movies.".format(n=len(movies)))
示例#4
0
def analyze_movies(movies, alignment_file_names, stats_models):
    all_results = []
    log.info("collecting data from {n} BAM files...".format(
             n=len(alignment_file_names)))
    for file_name in alignment_file_names:
        log.info("reading {f}.pbi".format(f=file_name))
        results = alignment_info_from_bam(file_name)
        for movie, aln_info in results.iteritems():
            log.info("Analyzing Movie {n} in {f}".format(n=movie, f=file_name))
            args = from_alignment_file(aln_info)
            _process_movie_data(movie, file_name, stats_models, *args)
    log.info("Completed analyzing {n} movies.".format(n=len(movies)))
示例#5
0
def analyze_movie(movie, alignment_file, stats_models):
    """
    The regions should only correspond to a single Movie


    :type movie: Movie
    :type stats_models: list
    """

    started_at = time.time()
    log.info("Analyzing Movie {n}".format(n=movie))

    movie_names, unrolled, data_, columns = from_alignment_file(
        movie, alignment_file)

    if len(data_) == 0:
        msg = "Movie '{n}' produced no alignments.".format(n=movie)
        log.warn(msg)
        return

    crunched = CrunchedAlignments(movie_names, unrolled, data_, columns)

    log.debug("Movie names from crunched {m}.".format(m=movie_names))

    reads = crunched.reads()

    # subreads recarray
    # ["Length", "Accuracy", "isFirst", "modStart", "isFullSubread", "isMaxSubread"]
    subreads = crunched.subreads()

    log.info("Movie")
    log.info(movie)
    log.info(('Number of reads', len(reads)))
    log.info(('Number of subreads', len(subreads)))

    for model in stats_models:
        if model.filter_func(movie):
            for aggregator in model.aggregators:
                if aggregator.DATA_TYPE == READ_TYPE:
                    aggregator.apply(reads)
                if aggregator.DATA_TYPE == SUBREAD_TYPE:
                    aggregator.apply(subreads)
        else:
            log.warn(
                "model {m}. Skipping movie {r}".format(m=repr(model), r=movie))
            pass

    run_time = time.time() - started_at
    _d = dict(n=movie, s=run_time)
    log.info("Completed analyzing Movie {n} with in {s:.2f} sec.".format(**_d))
def analyze_movie(movie, alignment_file, stats_models):
    """
    The regions should only correspond to a single Movie

    :type movie: Movie
    :type stats_models: list
    """
    started_at = time.time()
    movie_names, unrolled, data_, columns = from_alignment_file(
        movie, alignment_file)
    _process_movie_data(movie, alignment_file, stats_models,
                        movie_names, unrolled, data_, columns)
    run_time = time.time() - started_at
    _d = dict(n=movie, s=run_time)
    log.info("Completed analyzing Movie {n} with in {s:.2f} sec.".format(**_d))
def analyze_movies(movies, alignment_file_names, stats_models, nproc=1):
    #pool = None
    #if nproc >= 1:
    #    # XXX I use nproc-1 here because the callback in the main process
    #    # actually takes up a lot of time
    #    log.info("Starting pool of {n} processes".format(n=max(1, nproc-1)))
    #    pool = multiprocessing.Pool(processes=nproc)
    for movie in movies:
        for file_name in alignment_file_names:
            log.info("Analyzing Movie {n}".format(n=movie))
            results = from_alignment_file(movie, file_name)
            _process_movie_data(movie, file_name, stats_models, *results)
            # FIXME need to re-think this
            #def __analyze_movie(args):
            #    return from_alignment_file(*args)
            #__callback = functools.partial(_process_movie_data, movie,
            #                               file_name, stats_models)
            #pool.apply_async(from_alignment_file, (movie, file_name),
            #                 callback=__callback)
    #pool.close()
    #pool.join()
    log.info("Completed analyzing {n} movies.".format(n=len(movies)))