def main(): while True: word = input("Find a name in English dictionary: ") word = word.lower() if word == '/exit': break elif functions.translate(word): print(' \n'.join(functions.translate(word))) elif functions.similar_search(word): searched = functions.similar_search(word) print('Did you mean?(Y/N):', searched) answer = str(input()) answer = answer.lower() if answer == 'y': print(' \n'.join(functions.translate(searched))) else: print("Unknown word... Try again or type '/exit' to quit.") else: print("Unknown word... Try again or type '/exit' to quit.")
def insert_codons(seq1, seq2, pad_left, pad_right, homology_length, triplets_to_insert): seqlen = len(seq1) for codon_position in range(0, len(seq1), 3): # note that seq2 is the wild type replaced_codon = seq2[codon_position:codon_position + 3] replaced_AA = f.translate(replaced_codon) left_padding_length = max([homology_length - codon_position, 0]) if left_padding_length > 0: left_padding_seq = pad_left[-left_padding_length:].lower() else: left_padding_seq = '' right_padding_length = max( [homology_length - (len(seq1) - codon_position), 0]) if right_padding_length > 0: right_padding_seq = pad_right[:right_padding_length].lower() else: right_padding_seq = '' for i in triplets_to_insert: inserted_AA = f.translate(i) if inserted_AA in singleSNPcodons[replaced_codon]: singleSNPmutation = True else: singleSNPmutation = False left_seq = seq1[max(0, codon_position - homology_length):codon_position] right_seq = seq2[codon_position + 3:min(seqlen, codon_position + homology_length + 3)] yield (' '.join([ replaced_codon, "(" + replaced_AA + ")", "to", inserted_AA, str(singleSNPmutation), left_padding_seq + left_seq, i, right_seq + right_padding_seq ]))
def __init__(self, master=None): """Init method for the MainFrame class that contains every widget used""" Frame.__init__(self, master) #Defining Attributes : self.host_ip = str() self.host_socket = sk.socket(sk.AF_INET, sk.SOCK_STREAM) self.port = StringVar() self.max_connections = StringVar() self.users = dict() self.language = "EN-us" self.window = master self.pack(fill=BOTH) self.init_tabs() self.init_menu_bar() self.load_options() self.translation = translate()
def playGame(self): board = func.generateBoard() totalPieces = nn.BOARDSIZE-4 currPlayer = 1 func.visualizeBoard(board) while totalPieces != 0: # Check that there are valid moves and switch if necessary moves = func.possibleMoves(currPlayer, board) if not moves: currPlayer = 0 - currPlayer moves = func.possibleMoves(currPlayer, board) if not moves: break choice = -1 if currPlayer == 1: # Human playing print("Your move! (You are X)") while choice not in moves: x = int(input("Input x (Upper left is (0,0)): ")) y = int(input("Input y (Upper left is (0,0)): ")) choice = func.translate(x,y) else: # Follow the procedure to produce network output print("Opponent's move:") decisions = self.forwardpass(board, currPlayer) bestchoice = np.argmax(decisions) while bestchoice not in moves: decisions[bestchoice] = 0 bestchoice = np.argmax(decisions) choice = bestchoice # Update board, current player, and number of pieces. Then print # the board board = func.placePiece(currPlayer,board,choice,moves[choice]) func.visualizeBoard(board) currPlayer = 0 - currPlayer totalPieces -= 1 return sum(board) #func.visualizeBoard()
allAminoAcids = list(putativeCodonsTable['AA'].unique()) # retrieve rows with maximum frequency, grouped by amino acid abundantCodons = putativeCodonsTable.loc[putativeCodonsTable.groupby('AA') ['count'].idxmax()] # f.sampleCodon(putativeCodonsTable, "F") fasta_1 = f.read_fasta(args.fasta_1_filename) fasta_2 = f.read_fasta(args.fasta_2_filename) # Assert sequences are identical length assert (len(fasta_1) == len(fasta_2)), "Sequences not same length!" # Assert amino acid sequences are identical aa_1 = f.translate(fasta_1) aa_2 = f.translate(fasta_2) assert (aa_1 == aa_2), "Amino acid sequences not identical!" # Load upstream and downstream seq with open('upstream.dna', 'r') as infile: upstream = infile.read().strip() with open('downstream.dna', 'r') as infile: downstream = infile.read().strip() codons_1 = f.split_codons(fasta_1) codons_2 = f.split_codons(fasta_2) # create mutated codon dictionary
def encaps_RF(): args = parser.parse_args() data_size = args.size-1 if(args.step > 0 & args.step < 4): data = func.reading_data(args.diffuse,data_size) print("Finished with reading Data ... \n") if(args.sv): #calculate the mean scaled data = func.calc_scaled_width_and_length(data) data = data.drop(['scaled_width','scaled_length'],axis=1) print("Finished with calculating Mean Scaled Values ... \n") plt.plot(data["telescope_type_id"],data["mc_energy"],".") plt.xlabel("telescope_type") plt.ylabel("mc_energy") plt.savefig("plots/telescope_type.jpg") plt.close() if(args.step > 0 & args.step < 4): data = shuffle(data) #drop unimportant DATA data, droped_data = func.drop_data(data) #data['weight'] = droped_data['telescope_type_name'] truth = data[['mc_energy','array_event_id','run_id']] data = data.drop('mc_energy',axis=1) #fit and predict RFr = RandomForestRegressor(max_depth=10, n_jobs=-1,n_estimators=100, oob_score=True, max_features='sqrt') train_i, test_i = train_test_split(data[['array_event_id','run_id']],test_size=0.66) X_train = data.loc[data[['array_event_id','run_id']].isin(train_i)[data[['array_event_id','run_id']].isin(train_i)==True].dropna().index] X_test = data.loc[data[['array_event_id','run_id']].isin(test_i)[data[['array_event_id','run_id']].isin(test_i)==True].dropna().index] y_train = truth.loc[truth[['array_event_id','run_id']].isin(train_i)[truth[['array_event_id','run_id']].isin(train_i)==True].dropna().index] y_test = truth.loc[truth[['array_event_id','run_id']].isin(test_i)[truth[['array_event_id','run_id']].isin(test_i)==True].dropna().index] if(args.sv): #calculate the mean scaled X_train = func.calc_scaled_width_and_length(X_train) X_test = func.calc_scaled_width_and_length(X_test) X1 = X_train.drop(['array_event_id','run_id'],axis=1).values X2 = X_test.drop(['array_event_id','run_id'],axis=1).values y1 = y_train.drop(['array_event_id','run_id'],axis=1).values y2 = y_test.drop(['array_event_id','run_id'],axis=1).values print("We use these attributes for the first RF: \n ",list(X_train.drop(['array_event_id','run_id'],axis=1))) RFr.fit(X1, y1) ############### overfitting #################### print("The oob_score is: ",RFr.oob_score_) ############# feature importance ################ feature = RFr.feature_importances_ indices = np.argsort(feature)[::-1] names = list(X_train.drop(['array_event_id','run_id'],axis=1)) names = func.translate(names,1) # Print the feature ranking print("Feature ranking:") for f in range(X1.shape[1]): print("%d. feature %s (%f)" % (f + 1, names[indices[f]], feature[indices[f]])) data1=np.array([tree.feature_importances_ for tree in RFr.estimators_]) data1=data1[:,indices] position_ticks = np.arange(0,X1.shape[1])+1 plt.boxplot(data1,notch=False) plt.xticks(position_ticks,[names[i] for i in indices],rotation=90) plt.ylabel('Wichtigkeit') plt.tight_layout() plt.savefig("plots/feautureimportance_boxplot_firstForest.pdf") plt.close() ################# prediction############### predictions = RFr.predict(X2) print("Trainiert mit:",y_train.shape[0]," \t Getestet mit: ",y_test.shape[0]) z=np.array([predictions,y2[:,0]]) np.savetxt("data/encaps_pred_data.txt",z.T) pred = pd.DataFrame({'prediction':predictions, 'mc_energy':y_test['mc_energy'], 'array_event_id':y_test['array_event_id'], 'run_id':y_test['run_id']}) print('RandomForestRegressor:\n\t Coefficient for determination: %.2f \n' % r2_score(predictions,y2[:,0]), '\texplained_variance score: %.2f \n' % explained_variance_score(predictions,y2[:,0]), '\tmean squared error: %.2f \n' % mean_squared_error(predictions,y2[:,0]), "Finished with the first prediction ... \n") ''' ############## Treeinterpreter ################# test_dat = X2[:2,:] prediction, bias , contrebutions = ti.predict(RFr, test_dat) for i in range(len(test_dat)): print("Instance: ", i) print("Bias: ", bias[i]) print("Feuture contribution: ") for c, feauture in sorted(zip(contrebutions[i],names), key = lambda x: -abs(x[0])): print (feauture, round(c,2)) print("-"*20) ''' if(args.step > 1): ######### gewichteter und nicht gewichteter Mittelwert ###################### X_test_w = X_test.set_index(['run_id','array_event_id']) pred_w = pred.set_index(['run_id','array_event_id']) data_w = pd.concat([X_test_w,pred_w],axis=1).reset_index() truth_grouped = y_test.drop_duplicates().set_index(['run_id','array_event_id']) ################ Intensity als Gewicht?################# rel_err = (data_w["prediction"]-data_w["mc_energy"])/data_w["mc_energy"] plt.plot(rel_err,data_w["intensity"],'b.') plt.xlabel("relativer Fehler") plt.ylabel("Intensität") #plt.xscale("log") plt.yscale("log") plt.tight_layout() plt.savefig("plots/intensity.jpg") plt.close() """
def test_translate(): print("Testing translate function...") assert isinstance(translate(test_rna_strand), str) assert translate(test_rna_strand) == 'MAMAPRTEINSTRING' print("All tests passed!\n")
def RF_trafo(): args = parser.parse_args() data_size = args.size - 1 data = func.reading_data(args.diffuse, data_size) print("Finished with reading Data ... \n") if (args.sv): #calculate the mean scaled data = func.calc_scaled_width_and_length(data) data = data.drop(['scaled_width', 'scaled_length'], axis=1) print("Finished with calculating Mean Scaled Values ... \n") data = shuffle(data) #drop unimportant DATA data, droped_data = func.drop_data(data) #data['weight'] = droped_data['telescope_type_name'] truth = data[['mc_energy', 'array_event_id', 'run_id']] data = data.drop('mc_energy', axis=1) #fit and predict RFr = RandomForestRegressor(max_depth=10, n_jobs=-1, n_estimators=100, oob_score=True, max_features='sqrt') train_i, test_i = train_test_split(data[['array_event_id', 'run_id']], test_size=0.66) X_train = data.loc[data[['array_event_id', 'run_id']].isin(train_i)[data[ ['array_event_id', 'run_id']].isin(train_i) == True].dropna().index] X_test = data.loc[data[['array_event_id', 'run_id']].isin(test_i)[data[ ['array_event_id', 'run_id']].isin(test_i) == True].dropna().index] y_train = truth.loc[truth[[ 'array_event_id', 'run_id' ]].isin(train_i)[truth[['array_event_id', 'run_id']].isin(train_i) == True].dropna().index] y_test = truth.loc[truth[['array_event_id', 'run_id']].isin(test_i)[truth[ ['array_event_id', 'run_id']].isin(test_i) == True].dropna().index] if (args.sv): #calculate the mean scaled X_train = func.calc_scaled_width_and_length(X_train) X_test = func.calc_scaled_width_and_length(X_test) X1 = X_train.drop(['array_event_id', 'run_id'], axis=1).values X2 = X_test.drop(['array_event_id', 'run_id'], axis=1).values y1 = y_train.drop(['array_event_id', 'run_id'], axis=1).values y2 = y_test.drop(['array_event_id', 'run_id'], axis=1).values print("We use these attributes for the first RF: \n ", list(X_train.drop(['array_event_id', 'run_id'], axis=1))) ##################### Trafo ############################# y1 = np.log(y1 + 3) RFr.fit(X1, y1) ############### overfitting #################### print("The oob_score is: ", RFr.oob_score_) ############# feature importance ################ feature = RFr.feature_importances_ indices = np.argsort(feature)[::-1] names = list(X_train.drop(['array_event_id', 'run_id'], axis=1)) names = func.translate(names, 1) # Print the feature ranking print("Feature ranking:") for f in range(X1.shape[1]): print("%d. feature %s (%f)" % (f + 1, names[indices[f]], feature[indices[f]])) data1 = np.array([tree.feature_importances_ for tree in RFr.estimators_]) data1 = data1[:, indices] position_ticks = np.arange(0, X1.shape[1]) + 1 plt.boxplot(data1, notch=False) plt.xticks(position_ticks, [names[i] for i in indices], rotation=90) plt.ylabel('Wichtigkeit') plt.tight_layout() plt.savefig("plots/feautureimportance_boxplot_trafo_firstForest.pdf") plt.close() ################# prediction############### predictions = RFr.predict(X2) print("Trainiert mit:", y_train.shape[0], " \t Getestet mit: ", y_test.shape[0]) pred = pd.DataFrame({ 'prediction': predictions, 'mc_energy': y_test['mc_energy'], 'array_event_id': y_test['array_event_id'], 'run_id': y_test['run_id'] }) ######## Rücktransformation ########### predictions_rück = np.exp(predictions) - 3 z = np.array([predictions_rück, y2[:, 0]]) np.savetxt("data/trafo_pred_data.txt", z.T) print( 'RandomForestRegressor:\n\t Coefficient for determination: %.2f \n' % r2_score(predictions_rück, y2[:, 0]), '\texplained_variance score: %.2f \n' % explained_variance_score(predictions_rück, y2[:, 0]), '\tmean squared error: %.2f \n' % mean_squared_error(predictions_rück, y2[:, 0]), "Finished with the first prediction ... \n") ######### gewichteter und nicht gewichteter Mittelwert ###################### X_test_w = X_test.set_index(['run_id', 'array_event_id']) pred_w = pred.set_index(['run_id', 'array_event_id']) data_w = pd.concat([X_test_w, pred_w], axis=1).reset_index() truth_grouped = y_test.drop_duplicates().set_index( ['run_id', 'array_event_id']) ################ Mean and Median ################## x_grouped = data_w[['prediction', 'array_event_id', 'run_id']].groupby(by=['run_id', 'array_event_id']) pred_mean = x_grouped.mean() pred_mean.columns = ['mean_prediction'] pred_mean = pd.concat([pred_mean, truth_grouped], axis=1) ####Rücktrafo ###### pred_mean_rück = np.exp(pred_mean['mean_prediction']) - 3 pred_mean_rück = pd.concat([pred_mean_rück, truth_grouped], axis=1) z = np.array([ pred_mean_rück['mean_prediction'].values, pred_mean_rück['mc_energy'].values ]) np.savetxt("data/trafo_pred_mean_data.txt", z.T) print( 'RF with mean:\n\t Coefficient for determination: %.2f \n' % r2_score(pred_mean_rück['mean_prediction'].values, pred_mean_rück['mc_energy'].values), '\texplained_variance score: %.2f \n' % explained_variance_score(pred_mean_rück['mean_prediction'].values, pred_mean_rück['mc_energy'].values), '\tmean squared error: %.2f \n' % mean_squared_error(pred_mean_rück['mean_prediction'].values, pred_mean_rück['mc_energy'].values)) # use the prediction_median for another RF encaps_info = [ 'width', 'length', 'num_triggered_telescopes', 'num_triggered_lst', 'num_triggered_mst', 'num_triggered_sst', 'total_intensity', 'array_event_id', 'run_id' ] data_encaps = X_test[encaps_info] pred_mean = pred_mean.reset_index() data_encaps = data_encaps.merge(pred_mean, on=['run_id', 'array_event_id']) train_i2, test_i2 = train_test_split( data_encaps[['array_event_id', 'run_id']], test_size=0.5) X2_train = data_encaps.loc[data_encaps[[ 'array_event_id', 'run_id' ]].isin(train_i2)[data_encaps[['array_event_id', 'run_id']].isin(train_i2) == True].dropna().index] X2_test = data_encaps.loc[data_encaps[[ 'array_event_id', 'run_id' ]].isin(test_i2)[data_encaps[['array_event_id', 'run_id']].isin(test_i2) == True].dropna().index] if (args.sv): X2_train = func.calc_mean_scaled_width_and_length(X2_train) X2_test = func.calc_mean_scaled_width_and_length(X2_test) ######## neue Attribute berechnen ######### ######### Mittelwert der Energien nur für die LST's ########### pred = data_w[[ 'prediction', 'array_event_id', 'run_id', 'telescope_type_id' ]] telescope_type = pred['telescope_type_id'].copy(deep=True) pred = pred.drop('telescope_type_id', axis=1) pred_lst = pred[telescope_type == 1] prediction_lst_max = pred_lst.groupby( by=list(['run_id', 'array_event_id'])).max().reset_index() prediction_lst_min = pred_lst.groupby( by=list(['run_id', 'array_event_id'])).min().reset_index() prediction_lst = pred_lst.groupby( by=list(['run_id', 'array_event_id'])).mean().reset_index() prediction_lst_std = pred_lst.groupby( by=list(['run_id', 'array_event_id'])).std().reset_index() prediction_lst_max = prediction_lst_max.rename( columns={'prediction': 'max_lst_pred'}) prediction_lst_min = prediction_lst_min.rename( columns={'prediction': 'min_lst_pred'}) prediction_lst = prediction_lst.rename( columns={'prediction': 'mean_lst_pred'}) prediction_lst_std = prediction_lst_std.rename( columns={'prediction': 'std_lst_pred'}) pred_mst = pred[telescope_type == 2] prediction_mst_max = pred_mst.groupby( by=list(['run_id', 'array_event_id'])).max().reset_index() prediction_mst_min = pred_mst.groupby( by=list(['run_id', 'array_event_id'])).min().reset_index() prediction_mst = pred_mst.groupby( by=list(['run_id', 'array_event_id'])).mean().reset_index() prediction_mst_std = pred_mst.groupby( by=list(['run_id', 'array_event_id'])).std().reset_index() prediction_mst = prediction_mst.rename( columns={'prediction': 'mean_mst_pred'}) prediction_mst_std = prediction_mst_std.rename( columns={'prediction': 'std_mst_pred'}) prediction_mst_max = prediction_mst_max.rename( columns={'prediction': 'max_mst_pred'}) prediction_mst_min = prediction_mst_min.rename( columns={'prediction': 'min_mst_pred'}) pred_sst = pred[telescope_type == 3] prediction_sst_max = pred_sst.groupby( by=list(['run_id', 'array_event_id'])).max().reset_index() prediction_sst_min = pred_sst.groupby( by=list(['run_id', 'array_event_id'])).min().reset_index() prediction_sst = pred_sst.groupby( by=list(['run_id', 'array_event_id'])).mean().reset_index() prediction_sst_std = pred_sst.groupby( by=list(['run_id', 'array_event_id'])).std().reset_index() prediction_sst = prediction_sst.rename( columns={'prediction': 'mean_sst_pred'}) prediction_sst_std = prediction_sst_std.rename( columns={'prediction': 'std_sst_pred'}) prediction_sst_max = prediction_sst_max.rename( columns={'prediction': 'max_sst_pred'}) prediction_sst_min = prediction_sst_min.rename( columns={'prediction': 'min_sst_pred'}) X2_train = X2_train.reset_index() X2_test = X2_test.reset_index() X2_train = X2_train.merge( prediction_lst, how='left', on=['run_id', 'array_event_id']).merge( prediction_lst_std, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst_std, how='left', on=['run_id', 'array_event_id']).merge( prediction_sst, how='left', on=['run_id', 'array_event_id']).merge( prediction_sst_std, how='left', on=['run_id', 'array_event_id']).merge( prediction_lst_min, how='left', on=['run_id', 'array_event_id']).merge( prediction_lst_max, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst_min, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst_max, how='left', on=['run_id', 'array_event_id']).merge( prediction_sst_min, how='left', on=[ 'run_id', 'array_event_id' ]).merge( prediction_sst_max, how='left', on=[ 'run_id', 'array_event_id' ]) X2_test = X2_test.merge( prediction_lst, how='left', on=['run_id', 'array_event_id']).merge( prediction_lst_std, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst_std, how='left', on=['run_id', 'array_event_id']).merge( prediction_sst, how='left', on=['run_id', 'array_event_id']).merge( prediction_sst_std, how='left', on=['run_id', 'array_event_id']).merge( prediction_lst_min, how='left', on=['run_id', 'array_event_id']).merge( prediction_lst_max, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst_min, how='left', on=['run_id', 'array_event_id']).merge( prediction_mst_max, how='left', on=['run_id', 'array_event_id']).merge( prediction_sst_min, how='left', on=[ 'run_id', 'array_event_id' ]).merge( prediction_sst_max, how='left', on=[ 'run_id', 'array_event_id' ]) ##### if there is no lst or sst or mst who has seen this event, I set the mean and std on 0 and the std is Nan if there is just one prediction X2_test = X2_test.fillna(0) X2_test = shuffle(X2_test) y2_test = X2_test[['mc_energy']].copy(deep=True) X2_test = X2_test.drop(['mc_energy', 'array_event_id', 'run_id'], axis=1) X2_train = X2_train.fillna(0) X2_train = shuffle(X2_train) y2_train = X2_train[['mc_energy']].copy(deep=True) X2_train = X2_train.drop(['mc_energy', 'array_event_id', 'run_id'], axis=1) #fit and pred RFr2 = RandomForestRegressor(max_depth=10, n_jobs=-1, n_estimators=100, oob_score=True, max_features='sqrt') print("We use these attributes for the second RF: \n ", list(X2_train)) ########### Trafo ############ y2_train = np.log(y2_train + 3) RFr2.fit(X2_train.values, y2_train.values) ############### overfitting #################### print("The oob_score is: ", RFr2.oob_score_) ############# feature importance ################ feature = RFr2.feature_importances_ std = np.std([tree.feature_importances_ for tree in RFr2.estimators_], axis=0) indices = np.argsort(feature)[::-1] names = list(X2_train) names = func.translate(names, 2) # Print the feature ranking print("Feature ranking:") for f in range(X2_train.shape[1]): print("%d. feature %s (%f)" % (f + 1, names[indices[f]], feature[indices[f]])) data2 = np.array([tree.feature_importances_ for tree in RFr2.estimators_]) data2 = data2[:, indices] position_ticks = np.arange(0, X2_train.shape[1]) + 1 plt.boxplot(data2, notch=False) plt.xticks(position_ticks, [names[i] for i in indices], rotation=90) plt.ylabel('Wichtigkeit') plt.tight_layout() plt.savefig("plots/feautureimportance_boxplot_trafo_secondForest.pdf") plt.close() ####### Predictions ################ prediction_encaps = RFr2.predict(X2_test.values) print("Trainiert mit:", y2_train.shape[0], " \t Getestet mit: ", y2_test.shape[0]) ############ Rücktrafo ########### prediction_encaps = np.exp(prediction_encaps) - 3 print("Trainiert mit:", y2_train.shape[0], " \t Getestet mit: ", y2_test.shape[0]) z = np.array([prediction_encaps, y2_test['mc_energy'].values]) np.savetxt("data/trafo_encaps_pred_data.txt", z.T) print( 'encapsulated with median_prediction RF:\n\t Coefficient of determination: %.2f\n' % r2_score(prediction_encaps, y2_test.values), '\texplained_variance score: %.2f \n' % explained_variance_score(prediction_encaps, y2_test.values), '\tmean squared error: %.2f \n' % mean_squared_error(prediction_encaps, y2_test.values), "Finished with the encapsulated prediction \n")
import os import functions FILE_AFNOR = "181017151004_2119332 - Anonyme.AFN.txt" FILE_AFNOR_2 = "181017151004_2119332 - Anonyme_666.AFN.txt" functions.recuperation(FILE_AFNOR, "temp1.csv") functions.translate("temp1.csv", "temp2.csv") functions.analysis("temp2.csv", "prescription.obj") functions.recuperation(FILE_AFNOR_2, "temp3.csv") functions.translate("temp3.csv", "temp4.csv") functions.analysis("temp4.csv", "prescription_666.obj") try: os.remove("temp1.csv") os.remove("temp2.csv") os.remove("temp3.csv") os.remove("temp4.csv") except FileNotFoundError: pass OBJ = functions.load_object("prescription.obj") OBJ2 = functions.load_object("prescription_666.obj") functions.compare_prescription(OBJ, OBJ2)