def LoadFrame(ori_file, mode='b'): test_frame = pd.read_csv(ori_file) test_frame = test_frame.astype('str') if mode == 'b': test_frame['Reference_Allele'] = test_frame['Reference_Allele'] + \ test_frame['Tumor_Seq_Allele2'] test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'GT'] = 'CA' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'GC'] = 'CG' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'GA'] = 'CT' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'AT'] = 'TA' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'TC'] = 'AG' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'AC'] = 'TG' test_frame = SATA_PRETREAT.GetSurvTime(test_frame) test_frame = SATA_PRETREAT.NeuPreteate(test_frame) classed_frame = SATA_PRETREAT.Classifier(test_frame) classed_frame.drop( classed_frame[classed_frame['class_result'] == 0].index, inplace=True) classed_frame = SATA_PRETREAT.DropEmptyData(classed_frame, column='to_last_known_alive') return classed_frame
def VisiableSeq(test_frame): print('File_Loaded') test_frame = test_frame.astype('str') test_frame['Reference_Allele'] = test_frame['Reference_Allele'] + \ test_frame['Tumor_Seq_Allele2'] test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'GT'] = 'CA' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'GC'] = 'CG' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'GA'] = 'CT' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'AT'] = 'TA' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'AG'] = 'TC' test_frame['Reference_Allele'].loc[test_frame['Reference_Allele'] == 'AC'] = 'TG' print("********************************") # print(list(set(test_frame['Reference_Allele']))) test_frame = SATA_PRETREAT.GetSurvTime(test_frame) test_frame = SATA_PRETREAT.NeuPreteate(test_frame) classed_frame = SATA_PRETREAT.Classifier(test_frame) classed_frame.drop( classed_frame[classed_frame['class_result'] == 0].index, inplace=True) classed_frame.reset_index(drop=True, inplace=True) print('visiable frame loaded') pre_list = [] post_list = [] for i in range(len(classed_frame)): try: pre_list.append(str(WHOLE_SEQUENCE.get_seq(classed_frame.Chromosome[i], int( classed_frame.Start_Position[i])-EXT, int(classed_frame.Start_Position[i])-1)).lower()) except ValueError: print('!!!!!!!', i, ' wrong.') pre_list.append('nan') try: post_list.append(str(WHOLE_SEQUENCE.get_seq(classed_frame.Chromosome[i], int( classed_frame.Start_Position[i])+1, int(classed_frame.Start_Position[i])+EXT)).lower()) except ValueError: print('!!!!!!!', i, ' wrong.') post_list.append('nan') if i % 10000 == 0: print(i, ' has finished.') pre_list = np.array(pre_list) post_list = np.array(post_list) pre_list = pre_list.reshape(len(classed_frame), -1) post_list = post_list.reshape(len(classed_frame), -1) classed_frame['front_seq'] = pre_list classed_frame['behind_seq'] = post_list print(pd.crosstab(classed_frame['class_result'], classed_frame['Reference_Allele'])) return classed_frame
def _DropEmptyCol(self, drop_col=[]): """ paramater: |-> drop_col: a list of columns that needed to delete empty """ new_frame = self.dataframe for c in drop_col: new_frame = SATA_PRETREAT.DropEmptyData(new_frame, column=c) return new_frame
def ChiSeqTest(self, chisq_col='', recomb=[]): _sata_frame = self._DropEmptyCol(drop_col=[chisq_col]) chiseq_frame = SATA_PRETREAT.ChiSquarePretreat( self.dataframe, column=chisq_col, class_column=self.class_col) chiseq_frame = chiseq_frame.fillna(0) if recomb == []: chiseq_array = np.array(chiseq_frame) else: chiseq_array = np.array(chiseq_frame[recomb]) print(chiseq_array) chiseq_value = scipy.stats.chi2_contingency(chiseq_array) chiseq_frame['chi2'] = chiseq_value[0] chiseq_frame['P'] = chiseq_value[1] chiseq_frame['df'] = chiseq_value[2] return chiseq_frame
def CalSataItem(dataframe, outtxtfile, WITH_TYPICAL, output_f, label='base'): sata_dict = cfg.clicfeat_dict for cal_item in SATA_LIST: calcframe = SATA_PRETREAT.DropEmptyData(dataframe, column=cal_item) print('{:*^120}'.format(cal_item + '_' + label), file=outtxtfile) print('{:*^120}'.format(cal_item + '_' + label)) if sata_dict[cal_item] == 'ori': satasclass = SATA_METHODS.SataMethod(dataframe=calcframe, class_col='class_result') try: chiasclass = satasclass.ChiSeqTest(chisq_col=cal_item) print(chiasclass, file=outtxtfile) OutputFrame(chiasclass, in_f=output_f, filename=cal_item + '_' + label + '_as_class') except UnboundLocalError: print(cal_item, ' is loss', file=outtxtfile) sataasitem = SATA_METHODS.SataMethod(dataframe=calcframe, class_col=cal_item) try: chiasitem = sataasitem.ChiSeqTest(chisq_col='class_result') print(chiasitem, file=outtxtfile) OutputFrame(chiasitem, in_f=output_f, filename=cal_item + '_' + label + '_as_item') except UnboundLocalError: print(cal_item, ' is loss', file=outtxtfile) elif sata_dict[cal_item] == 'mid': if DRAW_BOX == True: mean_col = [] std_col = [] col_col = [] for cl in cfg.class_list_costom: kk = calcframe[cal_item][calcframe['class_result'] == cl] kk_count = kk.count() col_col.append(cl + ': Num=' + str(kk_count)) kk = np.array(kk).astype(float).astype(int) kk = abs(kk) if kk.size == 0: std_col.append(0) mean_col.append(0) else: mean_col.append(np.mean(kk)) std_col.append(np.std(kk)) print(std_col) plt.errorbar(range(len(col_col)), mean_col, yerr=std_col, fmt='o') plt.xticks(range(len(col_col)), col_col, rotation=90) try: plt.savefig(OUTPUT_PATH + output_f.split('.')[0] + '/' + str(WITH_TYPICAL) + '-' + cal_item + '.tif') except FileNotFoundError: os.mkdir(OUTPUT_PATH + output_f.split('.')[0] + '/') plt.savefig(OUTPUT_PATH + output_f.split('.')[0] + '/' + WITH_TYPICAL + '-' + cal_item + '.tif') plt.close() sataasmid = SATA_METHODS.SataMethod(dataframe=calcframe) itemdescribe = sataasmid.Describe(descb_col=cal_item) itemttest = sataasmid.CalTTest(describeframe=itemdescribe) print(itemdescribe, file=outtxtfile) print(itemttest, file=outtxtfile) OutputFrame(itemdescribe, in_f=output_f, filename=cal_item + '_' + label + '_describe') OutputFrame(itemttest, in_f=output_f, filename=cal_item + '_' + label + '_ttest') if cal_item == 'to_last_known_alive': print('{:*^120}'.format('logrank'), file=outtxtfile) lr_frame = sataasmid.LogRankTest(PATH + output_f.split('.')[0] + '/' + label + '_logrank') OutputFrame(lr_frame, in_f=output_f, filename=label + '_logrank') print(lr_frame, file=outtxtfile) print(lr_frame) else: for i in sata_dict[cal_item]: for j in sata_dict[cal_item][i]: calcframe[cal_item][calcframe[cal_item] == j] = i calcframe.drop(calcframe[calcframe[cal_item] == 'delete'].index, inplace=True) satacaledcalss = SATA_METHODS.SataMethod(dataframe=calcframe, class_col='class_result') try: chicaledclass = satacaledcalss.ChiSeqTest(chisq_col=cal_item) print(chicaledclass, file=outtxtfile) OutputFrame(chicaledclass, in_f=output_f, filename=cal_item + '_' + label + '_as_class') except UnboundLocalError: print(cal_item, ' is loss', file=outtxtfile) satacaleditem = SATA_METHODS.SataMethod(dataframe=calcframe, class_col=cal_item) try: chicaleditem = satacaleditem.ChiSeqTest( chisq_col='class_result') print(chicaleditem, file=outtxtfile) OutputFrame(chicaleditem, in_f=output_f, filename=cal_item + '_' + label + '_as_item') except UnboundLocalError: print(cal_item, ' is loss.', file=outtxtfile) print("{:=^120}".format('finish')) print("{:=^120}".format('finish'), file=outtxtfile)
def CalSataItem(dataframe, outtxtfile, output_f, label='base'): sata_dict = cfg.clicfeat_dict for cal_item in SATA_LIST: calcframe = SATA_PRETREAT.DropEmptyData(dataframe, column=cal_item) print('{:*^120}'.format(cal_item + '_' + label), file=outtxtfile) print('{:*^120}'.format(cal_item + '_' + label)) if sata_dict[cal_item] == 'ori': satasclass = SATA_METHODS.SataMethod(dataframe=calcframe, class_col='class_result') try: chiasclass = satasclass.ChiSeqTest(chisq_col=cal_item) print(chiasclass, file=outtxtfile) OutputFrame(chiasclass, in_f=output_f, filename=cal_item + '_' + label + '_as_class') except UnboundLocalError: print(cal_item, ' is loss', file=outtxtfile) sataasitem = SATA_METHODS.SataMethod(dataframe=calcframe, class_col=cal_item) try: chiasitem = sataasitem.ChiSeqTest(chisq_col='class_result') print(chiasitem, file=outtxtfile) OutputFrame(chiasitem, in_f=output_f, filename=cal_item + '_' + label + '_as_item') except UnboundLocalError: print(cal_item, ' is loss', file=outtxtfile) elif sata_dict[cal_item] == 'mid': sataasmid = SATA_METHODS.SataMethod(dataframe=calcframe) itemdescribe = sataasmid.Describe(descb_col=cal_item) itemttest = sataasmid.CalTTest(describeframe=itemdescribe) print(itemdescribe, file=outtxtfile) print(itemttest, file=outtxtfile) OutputFrame(itemdescribe, in_f=output_f, filename=cal_item + '_' + label + '_describe') OutputFrame(itemttest, in_f=output_f, filename=cal_item + '_' + label + '_ttest') if cal_item == 'to_last_known_alive': print('{:*^120}'.format('logrank'), file=outtxtfile) lr_frame = sataasmid.LogRankTest(PATH + output_f.split('.')[0] + '/' + label + '_logrank') OutputFrame(lr_frame, in_f=output_f, filename=label + '_logrank') print(lr_frame, file=outtxtfile) print(lr_frame) else: for i in sata_dict[cal_item]: for j in sata_dict[cal_item][i]: calcframe[cal_item][calcframe[cal_item] == j] = i calcframe.drop(calcframe[calcframe[cal_item] == 'delete'].index, inplace=True) satacaledcalss = SATA_METHODS.SataMethod(dataframe=calcframe, class_col='class_result') try: chicaledclass = satacaledcalss.ChiSeqTest(chisq_col=cal_item) print(chicaledclass, file=outtxtfile) OutputFrame(chicaledclass, in_f=output_f, filename=cal_item + '_' + label + '_as_class') except UnboundLocalError: print(cal_item, ' is loss', file=outtxtfile) satacaleditem = SATA_METHODS.SataMethod(dataframe=calcframe, class_col=cal_item) try: chicaleditem = satacaleditem.ChiSeqTest( chisq_col='class_result') print(chicaleditem, file=outtxtfile) OutputFrame(chicaleditem, in_f=output_f, filename=cal_item + '_' + label + '_as_item') except UnboundLocalError: print(cal_item, ' is loss.', file=outtxtfile) print("{:=^120}".format('finish')) print("{:=^120}".format('finish'), file=outtxtfile) outtxtfile.close()