def create_precision_report_row( classification: float, gt_conf: float = 0, sample: str = "sample1" ) -> pd.Series: ref_probe_header = ProbeHeader() pandora_probe_header = ProbeHeader(gt_conf=gt_conf) data = { "sample": sample, "query_probe_header": str(pandora_probe_header), "ref_probe_header": str(ref_probe_header), "classification": classification, } return pd.Series(data=data)
def create_classifier_with_two_entries(cls: Type) -> Type[Classifier]: flag = 0 cigar = "56M" nm = "NM:i:0" md = "MD:Z:56" mapq = 60 pos = 1 query_header = ProbeHeader(interval=ProbeInterval(12, 17)) sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" ref_header = ProbeHeader( chrom="GC00000422_2", sample="CFT073", pos=603, interval=ProbeInterval(25, 32), svtype="PH_SNPs", gt_conf=89.5987, ) header = create_sam_header(str(ref_header), 64) contents = str(header) + "\n" record1 = pysam.AlignedSegment.fromstring( f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32", header, ) contents += record1.to_string() + "\n" flag = 2048 cigar = "43M" nm = "NM:i:1" md = "MD:Z:21T21" mapq = 0 pos = 5 query_header = ProbeHeader(chrom="3", pos=14788, interval=ProbeInterval(21, 22)) sequence = "CGCGAAAGCCCTGACCATCTGCACCGTGTCTGACCACATCCGC" header = create_sam_header(str(ref_header), 57) record2 = pysam.AlignedSegment.fromstring( f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32", header, ) contents += record2.to_string() + "\n" sam = create_tmp_sam(contents) return cls(sam)
def create_incorrect_primary_sam_record() -> pysam.AlignedSegment: flag = 0 cigar = "56M" nm = "NM:i:1" md = "MD:Z:12T43" mapq = 60 pos = 1 query_header = ProbeHeader(interval=ProbeInterval(12, 13)) ref_header = ProbeHeader( chrom="GC00000422_2", sample="CFT073", pos=603, interval=ProbeInterval(25, 32), svtype="PH_SNPs", gt_conf=89.5987, ) sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT" header = create_sam_header(str(ref_header), 64) record = pysam.AlignedSegment.fromstring( f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32", header, ) return record
def create_correct_supplementary_sam_record() -> pysam.AlignedSegment: flag = 2048 cigar = "43M" nm = "NM:i:1" md = "MD:Z:19T23" mapq = 0 pos = 5 query_header = ProbeHeader(chrom="3", pos=14788, interval=ProbeInterval(21, 22)) ref_header = ProbeHeader( chrom="GC00000422_2", sample="CFT073", pos=603, interval=ProbeInterval(25, 32), svtype="PH_SNPs", gt_conf=89.5987, ) sequence = "CGCGAAAGCCCTGACCATCTGCACCGTGTCTGACCACATCCGC" header = create_sam_header(str(ref_header), 57) record = pysam.AlignedSegment.fromstring( f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32", header, ) return record
def create_recall_report_row( truth_probe_header:str, classification: AlignmentAssessment, gt_conf: float = 0, sample: str = "sample1", with_gt_conf=False ) -> pd.Series: vcf_probe_header = ProbeHeader(gt_conf=gt_conf) data = { "sample": sample, "query_probe_header": str(truth_probe_header), "ref_probe_header": str(vcf_probe_header), "classification": classification.value, "good_eval": classification.value in ["primary_correct", "secondary_correct", "supplementary_correct"], "PVID": None, "NB_ALL": None, "ALL_ID": None, "NB_DIFF_ALL_SEQ": None, "ALL_SEQ_ID": None, "NB_OF_SAMPLES": None, } if with_gt_conf: data["GT_CONF"] = gt_conf return pd.Series(data=data)