Пример #1
0
def test_named_QCMetricRecord_repr(obj_a1, obj_b):
    metrics = [obj_a1, obj_b]
    record = QCMetricRecord(metrics, name="dis_my_name")
    assert (
        record.__repr__() ==
        "QCMetricRecord([QCMetric('a', OrderedDict([(1, 2)])), QCMetric('b', OrderedDict([(3, 4)]))], name='dis_my_name')"
    )
Пример #2
0
def main(args):
    remove_bam_from_end_re = re.compile("\.bam$")
    bam_root = remove_bam_from_end_re.sub("", os.path.basename(args.anno_bam))
    with tarfile.open(args.rsem_index, "r:gz") as archive:
        archive.extractall(".",
                           members=make_modified_TarInfo(
                               archive, "rsem_index"))
    rsem_call = shlex.split(
        RSEM_COMMAND.format(
            rnd_seed=args.rnd_seed,
            ncpus=args.ncpus,
            ramGB=args.ramGB,
            fwd_prob=strand_to_fwd_prob(args.read_strand),
            paired_end=format_endedness(args.endedness),
            anno_bam=args.anno_bam,
            bam_root=bam_root,
        ))
    logger.info("Running RSEM command %s", " ".join(rsem_call))
    subprocess.call(rsem_call)
    gene_quant_fn = str(bam_root) + "_rsem.genes.results"
    number_of_genes_detected = calculate_number_of_genes_detected(
        gene_quant_fn)
    number_of_genes_detected_dict = {
        "number_of_genes_detected": number_of_genes_detected
    }
    qc_record = QCMetricRecord()
    number_of_genes_QC = QCMetric("number_of_genes_detected",
                                  number_of_genes_detected_dict)
    qc_record.add(number_of_genes_QC)

    with open(str(bam_root) + "_number_of_genes_detected.json", "w") as f:
        json.dump(qc_record.to_ordered_dict(), f)
Пример #3
0
def test_QCMetricRecord_repr(obj_a1, obj_b):
    metrics = [obj_a1, obj_b]
    record = QCMetricRecord(metrics)
    assert (
        record.__repr__() ==
        "QCMetricRecord([QCMetric('a', OrderedDict([(1, 2)])), QCMetric('b', OrderedDict([(3, 4)]))])"
    )
def main(args):
    quant1 = pd.read_csv(args.quants[0], sep="\t", header=None, skiprows=4)
    quant2 = pd.read_csv(args.quants[1], sep="\t", header=None, skiprows=4)
    spearman_correlation = quant1[1].corr(quant2[1], method="spearman")
    qc_record = QCMetricRecord()
    spearman_metric = QCMetric("spearman_correlation",
                               {"spearman_correlation": spearman_correlation})
    qc_record.add(spearman_metric)
    with open(args.output_filename, "w") as fp:
        json.dump(qc_record.to_ordered_dict(), fp)
Пример #5
0
def main(args):
    qc_record = QCMetricRecord()
    logger.info(
        "Reading transcript id to gene type mapping from %s",
        args.tr_id_to_gene_type_tsv,
    )
    tr_to_gene_type_map = read_dict_from_tsv(args.tr_id_to_gene_type_tsv)
    logger.info("Calculating gene type counts for bam %s", args.input_bam)
    gene_type_counts = get_gene_type_counts(tr_to_gene_type_map, args.input_bam)
    gene_type_counts = QCMetric("gene_type_count", gene_type_counts)
    qc_record.add(gene_type_counts)
    logger.info("Writing QC output into %s", args.output_filename)
    with open(args.output_filename, "wt") as fp:
        json.dump(qc_record.to_ordered_dict(), fp)
def main(args):
    abundance = pd.read_csv(args.abundance, sep="\t")
    abundance_filtered = filter_startswith_prefix(
        remove_genomic_transcripts(abundance), args.idprefix)
    gene_counts = calculate_abundances_aggregated_by_gene(
        abundance_filtered, args.counts_colname)
    number_of_genes_detected = sum(gene_counts >= 1)
    number_of_genes_record = QCMetricRecord()
    number_of_genes_metric = QCMetric(
        "number_of_genes_detected",
        {"number_of_genes_detected": number_of_genes_detected},
    )
    number_of_genes_record.add(number_of_genes_metric)
    with open(args.outfile, "w") as fp:
        json.dump(number_of_genes_record.to_ordered_dict(), fp)
def main(args):
    logger.info("Reading input tsv: %s" % args.quants)
    quants_tsv = pd.read_csv(args.quants, sep="\t", header=None, skiprows=4)
    # calculate number of mirnas expressed at cpm>2
    per_million = quants_tsv[1].sum() / 1000000
    quants_tsv["cpm"] = quants_tsv[1] / per_million
    cpm_gte2 = sum(quants_tsv["cpm"] >= 2)
    star_qc_record = QCMetricRecord()
    cpm_metric = QCMetric("expressed_mirnas", {"expressed_mirnas": cpm_gte2})
    # get metrics from star log
    star_qc = QCMetric("star_qc_metric", args.star_log, parse_starlog)
    star_qc_record.add_all([cpm_metric, star_qc])
    # calculate number of reads (unique + multimapping)
    reads_mapped = int(star_qc.content["Uniquely mapped reads number"]) + int(
        star_qc.content["Number of reads mapped to multiple loci"]
    )
    reads_mapped_qc = QCMetric("aligned_reads", {"aligned_reads": reads_mapped})
    star_qc_record.add(reads_mapped_qc)
    logger.info("Writing output json %s" % args.output_filename)
    with open(args.output_filename, "w") as fp:
        json.dump(star_qc_record.to_ordered_dict(), fp)
def main(args):
    rep1_abundance = pd.read_csv(args.rep1_abundance, sep="\t")
    rep2_abundance = pd.read_csv(args.rep2_abundance, sep="\t")
    rep1_filtered = filter_startswith_prefix(
        remove_genomic_transcripts(rep1_abundance), args.rep1_idprefix)
    rep2_filtered = filter_startswith_prefix(
        remove_genomic_transcripts(rep2_abundance), args.rep2_idprefix)
    del rep1_abundance
    del rep2_abundance
    rep1_counts = calculate_abundances_aggregated_by_gene(
        rep1_filtered, rep1_filtered.columns[-1])
    rep2_counts = calculate_abundances_aggregated_by_gene(
        rep2_filtered, rep2_filtered.columns[-1])
    del rep1_filtered
    del rep2_filtered
    aligned_counts = rep1_counts.align(rep2_counts, join="outer", fill_value=0)
    spearman = aligned_counts[0].corr(aligned_counts[1], method="spearman")
    correlation_qc = QCMetric("replicates_correlation",
                              {"spearman_correlation": spearman})
    spearman_record = QCMetricRecord([correlation_qc])
    with open(args.outfile, "w") as fp:
        json.dump(spearman_record.to_ordered_dict(), fp)
Пример #9
0
def qc_record():
    return QCMetricRecord()
Пример #10
0
def test_QCMetricRecord_getname():
    named_record = QCMetricRecord(name="dis_my_name")
    assert named_record.name == "dis_my_name"
Пример #11
0
def test_add_all_failure_because_not_unique(obj_a1, obj_a2, obj_b):
    record = QCMetricRecord([obj_a1])
    with pytest.raises(AssertionError):
        record.add_all([obj_b, obj_a2])
    assert len(record) == 1
Пример #12
0
def test_add_all_to_nonempty_success(qc_record, obj_a1, obj_b, obj_c, obj_d):
    metrics = [obj_a1, obj_b]
    record = QCMetricRecord(metrics)
    record.add_all([obj_c, obj_d])
    assert len(record) == 4
Пример #13
0
def test_init_from_list_success(obj_a1, obj_b):
    metrics = [obj_a1, obj_b]
    record = QCMetricRecord(metrics)
    assert record.metrics[0] is obj_a1
    assert record.metrics[1] is obj_b
Пример #14
0
def test_init_from_list_not_unique(obj_a1, obj_a2):
    metrics = [obj_a1, obj_a2]
    with pytest.raises(AssertionError):
        QCMetricRecord(metrics)