QC = str(case_SeqInfo['QC']) # PS1 PS_set = ['PS1', 'PS2'] for PS in PS_set: HLATyping = case_SeqInfo[PS]['HLATyping'] Donor = case_SeqInfo[PS]['Donor'] Recipient = case_SeqInfo[PS]['Recipient'] record = (BMT_caseID, Audit, Active, Comment, QC, HLATyping, PS, Donor, Recipient, ) cursor.execute('INSERT INTO OriginalSeqs VALUES (?,?,?,?,?,?,?,?,?)', record) conn.commit() conn.close() fname = output + 'SG41_52_HLA_' + locus + '_paired' IMGTdbIO.save_dict2pickle(available_records, fname) #aa = IMGTdbIO.load_pickle2dict(fname, output) ################# # Class II ################# all_DB_files = glob.glob("../Output/SG41_52/2018/IMGTv3310/AvailDB/*.db") db_file = all_DB_files[4] ## 0: DPB1 2:DRB1 5:DQB1 locus = db_file.split('_')[4] conn = sql.connect(db_file) # automatically creates a file if doesn't exist conn.row_factory = sql.Row # Each row is a dictionary: {colNames: Value} cursor = conn.cursor()
PS2_HLATyping, PS2_GLstringM, PS2_SeqM, Active, Audit, Comment, ) cursor.execute( 'INSERT INTO DR_pair_comparison VALUES (?,?,?,?,?,?,?,?,?,?,?)', record) conn.commit() conn.close() fname = '../Output/SG39/2018/SG39_DRpairs/SG39_HLA_' + locus + '_wComparison' IMGTdbIO.save_dict2pickle(DRpair_seqInfo, fname) ########### check GL-string match, sequence matching DRpair_seqInfo = {} Loci = ['A', 'B', 'C', 'DRB1', 'DQB1'] #, 'DPB1'] All_caseIDs = [] for locus in Loci: fname = '../Output/SG39/2018/SG39_DRpairs/SG39_HLA_' + locus + '_wComparison.pkl' DRpair_seqInfo[locus] = IMGTdbIO.load_pickle2dict(fname) All_caseIDs += list(DRpair_seqInfo[locus].keys()) All_caseIDs = list(set(All_caseIDs)) # 3412 total Matching_cases_stats = { "ClassI_paired": [], "fiveLoci_paired": [],
if CaseStats[caseID][locus]['PS1']['ARS'] > 0: LocusStats[typing]['ARS'].append(caseID) if CaseStats[caseID][locus]['PS1']['Non_ARS_exon'] >0: LocusStats[typing]['Non_ARS_exon'].append(caseID) if CaseStats[caseID][locus]['PS1']['Intron'] > 0: LocusStats[typing]['Intron'].append(caseID) for key, item in mm_locus_stats['MMannotation'].items(): if key.isdigit(): if item in LocusStats[typing].keys(): LocusStats[typing][item].append(caseID) else: LocusStats[typing] = {item: [caseID]} ClassI_stats = {'CaseStats': CaseStats, 'LocusStats': LocusStats} IMGTdbIO.save_dict2pickle(ClassI_stats, '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/ClassI_Stats_0125_'+groupType) #1220_'+groupType) # Class II #Group_fname = '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/ClassI_Stats_0125_' + groupType + '.pkl' #Stats_Dict = IMGTdbIO.load_pickle2dict(Group_fname) #CaseStats = Stats_Dict['CaseStats'] #LocusStats = Stats_Dict['LocusStats'] for caseID in group_caseIDs: # for locus in ClassII_loci: bothMM_output = "../Output/SG41_52/2018/IMGTv3310/SG41_52_bothMisMatched_locus_" + locus + "_0125_TargetedAlignment/" singleMM_output = "../Output/SG41_52/2018/IMGTv3310/SG41_52_singleMisMatched_" + locus + "_0125_TargetedAlignment/" ### Cases where both sequences don't match
CaseMatchTable[key][locus][ps][ 'Intron'].append(annReads) elif 'UTR' in annItem.split('.')[0]: CaseMatchTable[key][locus][ps][ 'UTR'].append(annReads) elif 'Exon' in annItem.split('.')[0]: if annItem.split('.')[0] in ARS_exon: CaseMatchTable[key][locus][ps][ 'ARS'].append(annReads) else: CaseMatchTable[key][locus][ps][ 'non_ARS_exon'].append( annReads) IMGTdbIO.save_dict2pickle( CaseMatchTable, '../Output/SG39/2018/SG39_Stats/FiveLoci_paired_case_MatchRecord_Locus_' + locus) IMGTdbIO.save_dict2pickle( CaseMatchTable, '../Output/SG39/2018/SG39_Stats/FiveLoci_paired_case_MatchRecord') ################ # Count the cases ################ CaseMatchTable = IMGTdbIO.load_pickle2dict( '../Output/SG39/2018/SG39_Stats/FiveLoci_paired_case_MatchRecord.pkl') ## : paired cases HLA typing stats fname = '../Output/SG39/2018/SG39_DRpairs/SG39_pairedCases_Stats.pkl' Matching_cases_stats = IMGTdbIO.load_pickle2dict(fname) ## Matching_cases_stats['fiveLoci_paired']
if CaseStats[caseID][locus]['PS1']['ARS'] > 0: LocusStats[typing]['ARS'].append(caseID) if CaseStats[caseID][locus]['PS1']['Non_ARS_exon'] >0: LocusStats[typing]['Non_ARS_exon'].append(caseID) if CaseStats[caseID][locus]['PS1']['Intron'] > 0: LocusStats[typing]['Intron'].append(caseID) for key, item in mm_locus_stats['MMannotation'].items(): if key.isdigit(): if item in LocusStats[typing].keys(): LocusStats[typing][item].append(caseID) else: LocusStats[typing] = {item: [caseID]} ClassI_stats = {'CaseStats': CaseStats, 'LocusStats': LocusStats} IMGTdbIO.save_dict2pickle(ClassI_stats, '../Output/SG39/2018/SG39_Stats/ClassI_Stats_0125_'+groupType) # Class II for caseID in group_caseIDs: # for locus in ClassII_loci: bothMM_output = "../Output/SG39/2018/SG39_bothMisMatched_locus_" + locus + "_0125_TargetedAlignment/" singleMM_output = "../Output/SG39/2018/SG39_singleMisMatched_" + locus + "_0125_TargetedAlignment/" ### Cases where both sequences don't match if caseID in Matching_cases_stats[locus+'_both_Seqmm']: mm_file_PS1 = glob.glob(bothMM_output+ 'CaseID_'+ caseID + '_Locus_' + locus + '_annotation_PS1*.pkl') for file_id in mm_file_PS1: mm_locus_stats_PS1 = IMGTdbIO.load_pickle2dict(file_id)
#CaseStats[key][locus] for annKey, annItem in seqAlgn_stats['MMannotation'].items(): if annKey.isdigit(): annReads = IMGTdbIO.annotationFormat(annKey, annItem, seqAlgn_stats['alignment']) #annItem+'[D:'+seqAlgn_stats['alignment']['Donor-'+ps] if 'Intron' in annItem.split('.')[0]: CaseMatchTable[key][locus][ps]['Intron'].append(annReads) elif 'UTR' in annItem.split('.')[0]: CaseMatchTable[key][locus][ps]['UTR'].append(annReads) elif 'Exon' in annItem.split('.')[0]: if annItem.split('.')[0] in ARS_exon: CaseMatchTable[key][locus][ps]['ARS'].append(annReads) else: CaseMatchTable[key][locus][ps]['non_ARS_exon'].append(annReads) IMGTdbIO.save_dict2pickle(CaseMatchTable, '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/'+groupType+'_case_MatchRecord_Locus_'+locus) IMGTdbIO.save_dict2pickle(CaseMatchTable, '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/'+groupType+'_case_MatchRecord') ################ # Count the cases ################ CaseMatchTable = IMGTdbIO.load_pickle2dict('../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/'+groupType+'_case_MatchRecord.pkl') #caseID = '44107' #CaseMatchTable[caseID] ## : paired cases HLA typing stats fname = '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/SG41_52_pairedCases_Stats.pkl' Matching_cases_stats = IMGTdbIO.load_pickle2dict(fname)