def evalAlign(file, references, LM, AM): buff = [] for i in range(len(references)): buff.append(open(references[i], "r")) with open(file, "r") as f: for line in f: procFrench = preprocess(line, "f") english = decode(procFrench, LM, AM) blueRef = [] for j in range(len(buff)): newline = buff[j].readline() blueRef.append(newline) blue1 = BLEU_score(english, blueRef, 1) blue2 = BLEU_score(english, blueRef, 2) blue3 = BLEU_score(english, blueRef, 3) print(blue1, blue2, blue3) for i in buff: i.close()
def main(): #lme = pickle.load(open(fn_lme,"rb")) #lmf = pickle.load(open(fn_lmf,"rb")) lme = lm_train(train_dir, "e", "lm_eng") vocabSize = len(lme) data_size = [1000, 10000, 15000, 30000] AMs = [] for i in data_size: AMs.append(align_ibm1(train_dir, i, max_iter)) f_sens = read_file(testF, "f") e_sens = read_file(teste, "e") google_e_sens = read_file(google_teste, "e") for i in range(len(f_sens)): print("processing:{}".format(f_sens[i])) fre = f_sens[i] ref_1 = e_sens[i] ref_2 = google_e_sens[i] print("ref1:{} ref2:{}".format(ref_1, ref_2)) for j in range(len(data_size)): eng = decode(fre, lme, AMs[j]) print("AM{} translation:{}".format(j, eng)) for n in range(1, 4): bleu = BLEU_score(eng, [ref_1, ref_2], n) print("{} bleu:{}".format(n, bleu))
def insertpending(txhex): try: rawtx = decode(txhex) except Exception,e: print "Error: ", e, "\n Could not decode PendingTx: ", txhex return
def main(): TIMIT_pkl_file = os.path.join(os.getcwd(), 'TIMIT_data_prepared_for_CTC.pkl') #TIMIT_pkl_file = os.path.join(os.getcwd(),'dat_test.pkl') with open(TIMIT_pkl_file, 'rb') as f: data = pickle.load(f) list_of_alphabets = data['chars'] tgt = data['y_indices'] #sess = tf.Session() #ctcsess = tf.Session() #model,chars,vocab = loadCLM(sess) num_test = 10 print "Model loaded" total_words = 0 Clm_errors = 0 argmax_errors = 0 #perm = np.random.permutation(len(data['x'])) perm = np.random.permutation(100) #1st 100 only test_vals = perm[:num_test] xtest = np.array([data['x'][i] for i in test_vals]) xtest = np.transpose(xtest, [1, 0, 2]) pred = runCTC(xtest)[0] print pred sess = tf.Session() #need new session model, chars, vocab = loadCLM(sess) print(pred.shape) pred = np.transpose(pred, [1, 0, 2]) for i in range(num_test): print(data['y_char'][i]) print(decode(pred[i], sess, model, chars, vocab, list_of_alphabets)) print('\n\n') sess.close()
def main(): TIMIT_pkl_file = os.path.join(os.getcwd(),'TIMIT_data_prepared_for_CTC.pkl') #TIMIT_pkl_file = os.path.join(os.getcwd(),'dat_test.pkl') with open(TIMIT_pkl_file,'rb') as f: data = pickle.load(f) list_of_alphabets = data['chars'] tgt = data['y_indices'] #sess = tf.Session() #ctcsess = tf.Session() #model,chars,vocab = loadCLM(sess) num_test = 10 print "Model loaded" total_words = 0 Clm_errors = 0 argmax_errors = 0 #perm = np.random.permutation(len(data['x'])) perm = np.random.permutation(100)#1st 100 only test_vals = perm[:num_test] xtest = np.array([data['x'][i] for i in test_vals]) xtest = np.transpose(xtest,[1,0,2]) pred = runCTC(xtest)[0] print pred sess=tf.Session() #need new session model,chars,vocab = loadCLM(sess) print(pred.shape) pred = np.transpose(pred,[1,0,2]) for i in range(num_test): print(data['y_char'][i]) print(decode(pred[i],sess,model,chars,vocab,list_of_alphabets)) print('\n\n') sess.close()
def insertpending(txhex): try: rawtx = decode(txhex) except Exception, e: print "Error: ", e, "\n Could not decode PendingTx: ", txhex return
def checkpendingpaymentduplicate(txhex): ret = False try: rawtx = decode(txhex) except Exception,e: print_debug(("Error: ", e, "\n Could not decode unsignedpretx: ", txhex),2) return ret
def main(args): """ #TODO: Perform outlined tasks in assignment, like loading alignment models, computing BLEU scores etc. (You may use the helper functions) It's entirely upto you how you want to write Task5.txt. This is just an (sparse) example. """ decoded_eng = [] # indir = "../data/Hansard/" indir = "/u/cs401/A2_SMT/data/Hansard/" alignment_size = [1000, 10000, 15000, 30000] # mydir = "/h/u4/g6/00/luxiaodi/csc401/a2/" with open(indir + "Testing/Task5.f", "r") as f: lines = f.readlines() LM = _getLM(indir + "Training/", "f", "./lm", False) eng_ref = open(indir + "Testing/Task5.e", "r").readlines() goo_ref = open(indir + "Testing/Task5.google.e", "r").readlines() eng_ref = [preprocess(i, 'e') for i in eng_ref] goo_ref = [preprocess(j, 'e') for j in goo_ref] f = open("Task5.txt", 'w+') f.write(discussion) f.write("\n\n") f.write("-" * 10 + "Evaluation START" + "-" * 10 + "\n") # eng_bleu_list = [] # goo_bleu_list = [] print("AM training") AMs = [ _getAM(indir + "Training/", size, 20, "./am" + str(size), False) for size in alignment_size ] for i in range(len(alignment_size)): size = alignment_size[i] # f.write(f"\n### Evaluating AM model: {AMs[i]} ### \n") f.write(f"\n### Evaluating AM model: AM{i} ### \n") f.write("Training size " + str(size) + ":\n") AM = AMs[i] print(i) decoded_eng = [decode(preprocess(line, "f"), LM, AM) for line in lines] for n in range(1, 4): bleu = _get_BLEU_scores(decoded_eng, eng_ref, goo_ref, n) f.write("BLEU_score with n:" + str(n) + "\n") f.write("bleu: " + str(bleu) + "\n\n") # f.write("Google bleu: "+str(google_bleu)) # eng_bleu_list.append(eng_bleu) # goo_bleu_list.append(google_bleu) f.write("\n\n") f.write("-" * 10 + "Evaluation END" + "-" * 10 + "\n") f.close()
def parse(data): # Pulls variable self_id from outside the function global self_id # splits message, taking the first element (assignment type) and the message as seperate variables cmd = data[0] msg = data[1:] # [server based identifier?] if cmd == "c": self_id = int(msg) # Checks to see who owns the message, each message sent is attributed a identifier: 't' elif cmd == "t": params = msg.split(",", 1) id = int(params[0]) # If the association id does not match the users print the message if id != self_id: decode(params[1]) print tmpCode
def __init__(self): """Инициализация класса""" super(krl, self).__init__() self.frequency = 525 self.code = 0x2C self.limiter = limiter(2, 2) self.det = fsk_det(19.55) #self.det_iir = det_iir() self.comp_det = comparator(-0.1, 0.1, 1) self.sem_pll = pll2(1) self.decode = decode()
def eval_candidates(test_dir, filename, LM_filename, AM_filename, n): LM = pickle.load(open(LM_filename, "rb")) AM = pickle.load(open(AM_filename, "rb")) cans = read_in_candidates(test_dir, filename) refs = read_in_references(test_dir, filename) scores = [] for i in range(len(cans)): scores.append(BLEU_score(decode(cans[i], LM, AM), refs[i], n)) return scores
def main(): f = open("Task5.txt", "w+") f.write("################################################## \r\n") f.write("# SUMMARY OF BLEU SCORES ON TRANSLATED SENTENCES # \r\n") f.write("################################################## \r\n\n") fre_path = '../data/Hansard/Testing/Task5.f' # '/u/cs401/A1/data/' # CHANGE FOR SUBMISSION fre_file = open(fre_path, 'r') fre_sentences = fre_file.readlines() # e_LM = lm_train("..data/Hansard/Training/", "e","") e_LM = load_LMs("../models/e_language_model.pickle") # LOAD ENGLISH LM reference_paths = [ "../data/Hansard/Testing/Task5.e", "../data/Hansard/Testing/Task5.google.e" ] # /u/cs401/A2_SMT/data/Hansard/Testing/Task5.e # /u/cs401/A2 SMT/data/Hansard/Testing/Task5.google.e # CHANGE FOR SUBMISSION f.write( "TLDR ANALYSIS:\n\nIt is clear that the two references differ from one anther simply by looking at the bleue_score results. In the Task5.e testing file, there are rarely any non-zero outputs beyond those associated with n=1. However, in the Task5.google.e testing file, there are significantly more non-zero outputs for n=1 and even some non-zero bleue_score scores for n=2. This is an indication that translations can vary among the machine translation source.\n\nComparing to a variety of references may be better because there is no longer dependency on one particular translator and its specific word choices. Furthermore, comparing to multiple references keeps semantics under consideration. What is meant by this is that it is possible to translate a sentence accurately using two different sets of words, as the meaning is more important than the words and their ordering.\n\nExample: Translating “ce travail est difficile”.\nIt is arguable that both english sentences below are correct translations of this sentence:\n1. this job is difficult\n2. this work is hard\n\nHowever, there is not one matching bigram.\n\n" ) for ref_path in reference_paths: ref_file = open(ref_path, 'r') references = ref_file.readlines() f.write("==================================================\n") f.write(" REFERENCE: {} \n".format(ref_path)) f.write("==================================================\n") for sample_size in [2, 25, 35, 72]: # GIVEN sample_size, DEFINE EQUIVALENT NUMBER OF SENTENCES IN ALL FILES if sample_size == 2: num_sentences = 1 if sample_size == 25: num_sentences = 10 if sample_size == 35: num_sentences = 15 if sample_size == 72: num_sentences = 30 AM = align_ibm1("../data/Hansard/Training/", sample_size, 15, "AM") # 10, 5 work well f.write("\n---------------------------------------\n") f.write("Using AM Model trained on {}K sentences\n".format( num_sentences)) f.write("---------------------------------------\n") sent_num = 1 for sentence in fre_sentences: french = preprocess(sentence, "f") english = decode(french, e_LM, AM) f.write("Sentence {}:\n".format(sent_num)) ref_file = open(ref_path, 'r') references = ref_file.readlines() for n in [1, 2, 3]: bleu_score = BLEU_score(sentence, references, n) f.write("n = {}: {}\n".format(n, bleu_score)) f.write("\n") sent_num += 1
def revise(): if request.method == 'POST': original_res = request.get_json() original_sent = original_res['original_text'] print({'original': original_sent}) output_sent = decode(original_sent) evals = evaluate(original_sent, output_sent) result = {'revised': output_sent, 'evaluate': evals} print(result) return json.dumps(result) else: return '<h1>这是revise连通性测试</h1>'
def evalAlign(): """ :return: NONe """ max_iteration = 10 report = open('Temp', 'w') train_dir = '/u/cs401/A2_SMT/data/Hansard/Training' LM = lm_train(train_dir, "e", "englishLM") vocab_size = len(LM["uni"]) report.write("Vocab size = " + str(vocab_size) + "\n") report.write("\n") corpus_size = [1000, 10000, 15000, 30000] n = [1, 2, 3] ref_file1 = open('/u/cs401/A2_SMT/data/Hansard/Testing/Task5.e', 'r') ref_file2 = open('/u/cs401/A2_SMT/data/Hansard/Testing/Task5.google.e', 'r') test_file = open('/u/cs401/A2_SMT/data/Hansard/Testing/Task5.f', 'r') ref_file1_ref = ref_file1.readlines() ref_file2_ref = ref_file2.readlines() test_lines = test_file.readlines() #train IBM for i in corpus_size: f = "".join(['am_hansard_', str(i)]) AM = align_ibm1(train_dir, i, max_iteration, f) report.write("Model with " + str(i) + " sentences. \n") for line in test_lines: report.write("The sentence is: " + line + '\n') line = preprocess(line, "f") trans = decode(line, LM, AM) report.write('\n') bl1 = BLEU_score(trans, ref_file1_ref, 1) bl2 = BLEU_score(trans, ref_file1_ref, 2) bl3 = BLEU_score(trans, ref_file1_ref, 3) bl_google_1 = BLEU_score(trans, ref_file2_ref, 1) bl_google_2 = BLEU_score(trans, ref_file2_ref, 2) bl_google_3 = BLEU_score(trans, ref_file2_ref, 3) report.write("BLEU SCORE: \n") report.write("GOOGLE || SELF || i \n") report.write(str(bl_google_1) + " || " + str(bl1) + "|| 1 \n") report.write(str(bl_google_2) + " || " + str(bl2) + " || 2 \n") report.write(str(bl_google_3) + " || " + str(bl3) + " || 3 \n") report.write( "====================================================================================\n" ) report.write("\n")
def evalAlign(max_iter): ''' Translate the 25 French sentences in /u/cs401/A2 SMT/data/Hansard/Testing/Task5.f with the decode function and evaluate them using corresponding reference sentences, specifically: 1. /u/cs401/A2 SMT/data/Hansard/Testing/Task5.e, from the Hansards. 2. /u/cs401/A2 SMT/data/Hansard/Testing/Task5.google.e, Google’s translations of the French phrases2. To evaluate each translation, use the BLEU score from lecture 6, Repeat this task with at least four alignment models (trained on 1K, 10K, 15K, and 30K sentences, respectively) and with three values of n in the BLEU score (i.e., n = 1, 2, 3). You should therefore have 25×4×3 BLEU scores in your evaluation. ''' bleu = np.zeros(shape=(25, 4, 3)) train_dir = "/u/cs401/A2_SMT/data/Hansard/Training/" LM = lm_train(train_dir, "e", "fn_LM_e") num_sentences = [1000, 10000, 15000, 30000] for n in range(len(num_sentences)): n_s = num_sentences[n] AM = align_ibm1(train_dir, n_s, max_iter, "fm_AM_e_{}".format(n_s)) with open( "/u/cs401/A2_SMT/data/Hansard/Testing/Task5.f" ) as candidate_sentences, open( "/u/cs401/A2_SMT/data/Hansard/Testing/Task5.e") as ref_1, open( "/u/cs401/A2_SMT/data/Hansard/Testing/Task5.google.e" ) as ref_2: candidate_sentences = candidate_sentences.readlines() ref_1 = ref_1.readlines() ref_2 = ref_2.readlines() for i in range(len(candidate_sentences)): sentence = candidate_sentences[i].strip() sentence = preprocess(sentence, "f") ref_1_sentence = preprocess(ref_1[i].strip(), "e") ref_2_sentence = preprocess(ref_2[i].strip(), "e") english = decode(sentence, LM, AM) bleu[i][n][0] = BLEU_score(english, [ref_1_sentence, ref_2_sentence], 1) bleu[i][n][1] = BLEU_score(english, [ref_1_sentence, ref_2_sentence], 2) bleu[i][n][2] = BLEU_score(english, [ref_1_sentence, ref_2_sentence], 3) return bleu
def main(args): """ #TODO: Perform outlined tasks in assignment, like loading alignment models, computing BLEU scores etc. (You may use the helper functions) It's entirely upto you how you want to write Task5.txt. This is just an (sparse) example. """ max_iter = 100 num_sent = 1000 data_dir = "../data/Hansard/Training/" language = 'e' fn_LM = 'e_temp_lm' fn_AM = 'e_temp_am' bleu_n = 1 bleu_score = [] LM = _getLM(data_dir, language, fn_LM) AM = _getAM(data_dir, num_sent, max_iter, fn_AM) read_file_f = open('../data/Hansard/Testing/Task5.f', 'r') read_data_f = read_file_f.read() data_f = read_data_f.split('\n') read_file_e = open('../data/Hansard/Testing/Task5.e', 'r') read_data_e = read_file_e.read() data_e = read_data_e.split('\n') read_file_er = open('../data/Hansard/Testing/Task5.google.e', 'r') read_data_er = read_file_er.read() data_er = read_data_er.split('\n') #print(len(data_f), len(data_e), len(data_er)) e=[] ref1 = [] ref2 = [] for i in range(len(data_f)): f_prep = preprocess(data_f[i], 'f') e.append(decode(f_prep,LM,AM)) ref1.append(preprocess(data_e[i], 'e')) ref2.append(preprocess(data_er[i], 'e')) scores = _get_BLEU_scores(e, ref1, ref2, bleu_n) print('done')
def evalAlign(hansard_english, hansard_french, google_english, LM_PATH, train_dir, fn_AM, report_path): """ Evaluates the alignment model created by IBM-1 algorithm by comparing french to english translations from AM with translations from the hansard_english and google_english files using BLEU_score hansard_english: /Hansard/Testing/Task5.e hansard_french: /Hansard/Testing/Task5.f google_english: /Hansard/Testing/Task5.google.e LM_PATH: English language_model path train_dir: directory for training set fn_AM: path to save AM report_path: path to save Task5.txt report """ # which file to write the report report_file = open(report_path, 'w') # read in all the sentences hansard_english = open(hansard_english).read().split('\n') hansard_french = open(hansard_french).read().split('\n') google_english = open(google_english).read().split('\n') # create in the language model LME = lm_train(train_dir, "e", LM_PATH) # decode and calculate blue score for AM models trained on different num_sentences # and BLEU_scores calculated on different n-grams for num_sentences in [1000, 10000, 15000, 30000]: AM = align_ibm1(train_dir, num_sentences, 5, fn_AM) for f in range(25): fproc = preprocess(hansard_french[f], 'f') for n in range(1, 4): my_english = decode(fproc, LME, AM) hans_ref = preprocess(hansard_english[f], 'e') ggle_ref = preprocess(google_english[f], 'e') references = [hans_ref, ggle_ref] print('MY-CANDIDATE: {}\nHANS-REF: {}\nGOOGLE-REF: {}'.format( my_english, hans_ref, ggle_ref)) score = BLEU_score(my_english, references, n) report_string = 'french-sentence: {}, my-english-sentence: {}, num-sentences-for-AM: {}, n-grams: {}, BLEU-score: {}'.format( fproc, my_english, num_sentences, n, score) report_file.write(report_string + '\n') report_file.close()
def listen(key_file=None): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] old_settings = termios.tcgetattr(sys.stdin) try: tty.setcbreak(sys.stdin.fileno()) print("* recording, press ESC to stop") while True: data = stream.read(CHUNK) frames.append(data) if isData(): c = sys.stdin.read(1) if c == '\x1b': print("* done recording") stream.stop_stream() stream.close() p.terminate() break finally: termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() return decode(WAVE_OUTPUT_FILENAME, key_file)
def evalAlign(train_dir, fn_AM, fn_LM, test_dir, max_iter, num_sentences, bleu_n): """ Produce your own translations, obtain reference translations from Google and the Hansards, and use the latter to evaluate the former, with a BLEU score :param train_dir: :param fn_AM: :param fn_LM: :param test_dir: :param max_iter: :param num_sentences: :return: """ bleu = 0 # generate LM and AM LM = lm_train(train_dir, "e", fn_LM) print("LM generated") AM = align_ibm1(train_dir, num_sentences, max_iter, fn_AM) print("AM generated") # test block if os.path.exists(test_dir): print("Correct path...") # find all the test data, and find Task5.f only for subdir, dirs, files in os.walk(test_dir): for file in files: file_name = os.path.basename(file) # print(file_name) # test file name must be "Task5.f" if file_name == "Task5.f": with open(test_dir + file_name, "r") as f: for line in f: french = preprocess(line, "f") # this is the candidate english = decode(french, LM, AM) # print("ENG: " + english) else: print("Path " + test_dir + " does not exist ...")
def run_test(data, code_type, count_results): result = {'True': [], 'False': []} if code_type == 'encode': for pair in data: res = encode(pair[0]) if res == pair[1]: lst = result['True'] lst.append(pair) result['True'] = lst else: lst = result['False'] lst.append(pair) result['False'] = lst elif code_type == 'decode': for pair in data: res = decode(pair[1]) if res == pair[0]: lst = result['True'] lst.append(pair) result['True'] = lst else: lst = result['False'] lst.append(pair) result['False'] = lst if count_results == True: count_true = result['True'] count_true = len(count_true) result['True'] = count_true count_false = result['False'] count_false = len(count_false) result['False'] = count_false return [code_type, result] return [code_type, result]
def evalAlign(fre_dir, LM_path, AM_path): LM = {} with (open(LM_path, "rb")) as openfile: LM = pickle.load(openfile) AM = {} with (open(AM_path, "rb")) as openfile: AM = pickle.load(openfile) english_candidates = [] with open(french_path) as french_file: for sentence in french_file: english = decode(preprocess(sentence, "f"), LM, AM) # make sure to preprocess the sentence! english_candidates.append(english) print(english) e_refs = [] with open(human_ref) as english_file: for sentence in english_file: e_refs.append(preprocess(sentence, "e")) google_e_refs = [] with open(google_ref) as english_file: for sentence in english_file: google_e_refs.append(preprocess(sentence, "e")) for n_grams_level in range(1, 4): print("n_grams_level=%d" % n_grams_level) # Calculate BLEU scores for i in range(len(english_candidates)): bleu_score = BLEU_score(english_candidates[i], [e_refs[i], google_e_refs[i]], n_grams_level) print(bleu_score)
temp.append(i['read_bytes']) else: temp.append(0) data.append(temp) data.sort(key=(lambda x: x[0])) area = [0 for i in range(area_num)] num = [0 for i in range(area_num)] hot_data_index = [0 for i in range(area_num)] for i in range(len(data)): key = '' if i == 0: # 因为第一个region的start_key为‘’,所以使用end_key代替 key = data[i][1] else: key = data[i][0] type, tableid, rowid, indexvalue = decode(key) if (tablelist.count(tableid)): #把数据的读写都加上 area[tablelist.index(tableid)] = area[tablelist.index( tableid)] + data[i][2] + data[i][3] num[tablelist.index(tableid)] += 1 for i in range(area_num): area[i] = area[i] // num[i] #注释了是总负载,不注释是平均负载 #hot_data_index[i] = find_hot_data(tablelist[i]) #补上缺少的时间,缺少的直接使用前一刻的数据 #nowtime = int(files[k][-7:-5]) print(k, int(files[k][0:-4])) nowtime = time.localtime(int(files[k][0:-4])).tm_min if k == 0: print(files[k], nowtime, area)
from decode import * noise1 = white_noise(0) fsk1 = fsk_gen(525,235*1,0x2c) # source fsk signal fsk2 = fsk_gen(475,235*2,0x3c) # interference fsk signal 1 fsk3 = fsk_gen(575,235*2,0x5a) # interference fsk signal 2 limiter0_in = limiter (-200,200) limiter_in = limiter (-2,2) # input limiter chan_fir = fir(h_bpf_525) #.channel filter det = fsk_det(19.55) # fsk detector #19.55@525 det_iir = IIR2Filter(4, [10], 'low',design='cheby1',rs = 2, fs=fs) comp_det = comparator(-0.1,0.1, 1)# comparator after fsk detector filter sem_pll = pll2(1) decoder1 = decode() noise_buf = [] signal_buf = [] signal2_buf = [] limiter0_buf = [] filter_buf = [] limiter_buf = [] fsk_det_buf = [] fsk_det_flt_buf = [] comp_buf = [] sem_pll_buf = [] sem_pll_err_buf = [] input_signal_buf = []
from model import * from decode import * img = './result.jpg' img_data, module_size, x_locations_list, y_locations_list = model.getDataArea(img) x_locations_list = list(range(0, 29 * 16 + 1, 16)) y_locations_list = list(range(0, 29 * 16 + 1, 16)) message = decode(img_data, module_size, x_locations_list, y_locations_list) print(message)
def main(args): """ #TODO: Perform outlined tasks in assignment, like loading alignment models, computing BLEU scores etc. (You may use the helper functions) It's entirely upto you how you want to write Task5.txt. This is just an (sparse) example. """ ## Write Results to Task5.txt (See e.g. Task5_eg.txt for ideation). ## LM = _getLM("/u/cs401/A2_SMT/data/Hansard/Training/", "e", "task2", use_cached=True) AMs = {} AM_1k = _getAM("/u/cs401/A2_SMT/data/Hansard/Training/", 1000, 100, "am", use_cached=False) AM_10k = _getAM("/u/cs401/A2_SMT/data/Hansard/Training/", 10000, 100, "am_10k", use_cached=True) AM_15k = _getAM("/u/cs401/A2_SMT/data/Hansard/Training/", 15000, 100, "am_15k", use_cached=True) AM_30k = _getAM("/u/cs401/A2_SMT/data/Hansard/Training/", 30000, 100, "am_30k", use_cached=True) AMs[1000] = AM_1k AMs[10000] = AM_10k AMs[15000] = AM_15k AMs[30000] = AM_30k f_candidate = open("/u/cs401/A2_SMT/data/Hansard/Testing/Task5.f") f_hansard = open("/u/cs401/A2_SMT/data/Hansard/Testing/Task5.e") f_google = open("/u/cs401/A2_SMT/data/Hansard/Testing/Task5.google.e") candidates = [] hansard = [] google = [] for i in range(25): candidates.append(preprocess(f_candidate.readline(), 'f')) hansard.append(preprocess(f_hansard.readline(), 'e')) google.append(preprocess(f_google.readline(), 'e')) # print(len(hansard)) # print(len(google)) f = open("Task5.txt", 'w+') f.write("-" * 10 + "Evaluation START" + "-" * 10 + "\n") for i in AMs: AM = AMs[i] f.write("\n### Evaluating AM model: number of sentenses = %d ### \n" % i) # Decode using AM # # Eval using 3 N-gram models # decoded_sen = [] # print(candidates[0]) # print(decode(candidates[0], LM, AM)) for j in range(25): decoded_sen.append(decode(candidates[j], LM, AM)) # print(len(decoded_sen)) # for j in range(25): # print(decoded_sen[j]) # print(hansard[j]) # print(google[j]) # # print("*******************************************************************") all_evals = [] for n in range(1, 4): f.write("\nBLEU scores with N-gram (n) = %d: " % n) evals = _get_BLEU_scores(decoded_sen, hansard, google, n) for v in evals: f.write("\t{%1.4f}" % v) all_evals.append(evals) f.write("\n\n") f.write("-" * 10 + "Evaluation END" + "-" * 10 + "\n") f.close()
import tensorflow as tf from decode import * # import tensorflow.contrib.eager as tfe # tfe.enable_eager_execution() batch = 4 max_len = 10 dim = 2 hidden_size = 3 inputs = tf.random_normal([max_len, batch, dim]) lengths = tf.convert_to_tensor([4,5,6,7]) c = tf.zeros([batch, hidden_size]) h = tf.zeros([batch, hidden_size]) state = TagLSTMStateTuple(c, h, tf.zeros_like(c)) output = decode(inputs, state, lengths, hidden_size) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) t = sess.run(output) print(t.shape) # print(t)
def evalAlign(iterations): num_read = 0 test_dir = '/u/cs401/A2_SMT/data/Hansard/Testing/' train_dir = '/u/cs401/A2_SMT/data/Hansard/Training/' #Training on 1k print("Generating LM and AM for 1k") print("Generating LM") LM1 = lm_train(train_dir, 'e', 'LM1') print("Generating AM") AM1 = align_ibm1(train_dir, 1000, iterations, 'AM1') #Training on 10k print("Generating LM and AM for 10k") print("Generating LM") LM2 = lm_train(train_dir, 'e', 'LM2') print("Generating AM") AM2 = align_ibm1(train_dir, 10000, iterations, 'AM2') #Training on 15k print("Generating LM and AM for 15k") print("Generating LM") LM3 = lm_train(train_dir, 'e', 'LM3') print("Generating AM") AM3 = align_ibm1(train_dir, 15000, iterations, 'AM3') #Training on 30k print("Generating LM and AM for 30k") print("Generating LM") LM4 = lm_train(train_dir, 'e', 'LM4') print("Generating AM") AM4 = align_ibm1(train_dir, 30000, iterations, 'AM4') sens_f = [] #Read the 25 french sentences from the file with open(test_dir + 'Task5.f', 'r') as fp: for line in fp: sens_f.append(line) num_read = num_read + 1 if num_read >= 25: break #Get 3 reference sentences for each french sentence #Read the 25 english reference sentences from the file ref_e1 = [] num_read = 0 with open(test_dir + 'Task5.google.e', 'r') as fp: for line in fp: ref_e1.append(preprocess(line, 'e')) num_read = num_read + 1 if num_read >= 25: break #Read the 25 english reference sentences from the file ref_e2 = [] num_read = 0 with open(test_dir + 'Task5.e', 'r') as fp: for line in fp: ref_e2.append(preprocess(line, 'e')) num_read = num_read + 1 if num_read >= 25: break #Read the 25 english reference sentences from the file ref_e3 = [] num_read = 0 with open(test_dir + 'Task5.e~', 'r') as fp: for line in fp: ref_e3.append(preprocess(line, 'e')) num_read = num_read + 1 if num_read >= 25: break #Calculate the BLEU scores for each french sentence with different n values for 1k print("Calculating scores for 1k") scores1 = [] for x in range(0, 25): candidate = decode(preprocess(sens_f[x], 'f'), LM1, AM1) #Reference sentences for this french sentence refs = [] refs.append(ref_e1[x]) refs.append(ref_e2[x]) refs.append(ref_e3[x]) same_sentence_scores = [] for n in range(1, 4): same_sentence_scores.append(round(BLEU_score(candidate, refs, n), 4)) scores1.append(same_sentence_scores) #Calculate the BLEU scores for each french sentence with different n values print("Calculating scores for 10k") scores2 = [] for x in range(0, 25): candidate = decode(preprocess(sens_f[x], 'f'), LM2, AM2) #Reference sentences for this french sentence refs = [] refs.append(ref_e1[x]) refs.append(ref_e2[x]) refs.append(ref_e3[x]) same_sentence_scores = [] for n in range(1, 4): same_sentence_scores.append(round(BLEU_score(candidate, refs, n), 4)) scores2.append(same_sentence_scores) #Calculate the BLEU scores for each french sentence with different n values print("Calculating scores for 15k") scores3 = [] for x in range(0, 25): candidate = decode(preprocess(sens_f[x], 'f'), LM3, AM3) #Reference sentences for this french sentence refs = [] refs.append(ref_e1[x]) refs.append(ref_e2[x]) refs.append(ref_e3[x]) same_sentence_scores = [] for n in range(1, 4): same_sentence_scores.append(round(BLEU_score(candidate, refs, n), 4)) scores3.append(same_sentence_scores) #Calculate the BLEU scores for each french sentence with different n values print("Calculating scores for 30k") scores4 = [] for x in range(0, 25): candidate = decode(preprocess(sens_f[x], 'f'), LM4, AM4) #Reference sentences for this french sentence refs = [] refs.append(ref_e1[x]) refs.append(ref_e2[x]) refs.append(ref_e3[x]) same_sentence_scores = [] for n in range(1, 4): same_sentence_scores.append(round(BLEU_score(candidate, refs, n), 4)) scores4.append(same_sentence_scores) for x in range(0, 25): print("Sentence " + str(x + 1) + " :" , scores1[x], scores2[x], scores3[x], scores4[x])
AM = align_ibm1("/u/cs401/A2_SMT/data/Hansard/Training/", num_sent, 40, "~") for n in [1, 2, 3]: print("\n\n-----------------") print("EVALUATING WITH: ", num_sent, " TOTAL TRAINING SENTENCES") print("VALUE OF BLEU SCORE N: ", n) results.write("\n\n\n-----------------") results.write("\nEVALUATING WITH: " + str(num_sent) + " TOTAL TRAINING SENTENCES") results.write("\nVALUE OF BLEU SCORE N: " + str(n) + "\n") with open("/u/cs401/A2_SMT/data/Hansard/Testing/Task5.f") as freFile, \ open("/u/cs401/A2_SMT/data/Hansard/Testing/Task5.google.e") as groundTruth, \ open("/u/cs401/A2_SMT/data/Hansard/Testing/Task5.e") as groundTruthHansard: accum = 0.0 for freLine, engTruth1, engTruth2 in zip(freFile, groundTruth, groundTruthHansard): engSentence = decode(preprocess(freLine, 'f', add_null=True), LM, AM) engTruth1 = preprocess(engTruth1, 'e', add_null=True) engTruth2 = preprocess(engTruth2, 'e', add_null=True) dist1 = abs(len(engTruth1.split()) - len(engSentence.split())) dist2 = abs(len(engTruth2.split()) - len(engSentence.split())) if( dist1 > dist2 ): bleuSent = engTruth2 dist = dist2 else: bleuSent = engTruth1 dist = dist1 BP = min(1, math.exp(1 - (len(bleuSent.split())/len(engSentence.split())))) engSentence = engSentence.split() engTruth1 = engTruth1.split() engTruth2 = engTruth2.split()
def decodeNTimes(input, n): for i in range(n): input = decode(input) # print(input) return input
File "<pyshell#16>", line 1, in <module> import tflearn ImportError: No module named tflearn >>> import theano >>> import zbar Traceback (most recent call last): File "<pyshell#18>", line 1, in <module> import zbar ImportError: No module named zbar >>> def dec(path): image = cv2.imread('pyzbar/tests/code128.png') >>> height, width = image.shape[:2] >>> # 8 bpp by considering just the blue channel >>> decode((image[:, :, 0].astype('uint8').tobytes(), width, height)) SyntaxError: invalid syntax >>> def dec(path): image = cv2.imread('pyzbar/tests/code128.png') height, width = image.shape[:2] # 8 bpp by considering just the blue channel decode((image[:, :, 0].astype('uint8').tobytes(), width, height)) >>> def dec(path): image = cv2.imread('pyzbar/tests/code128.png') height, width = image.shape[:2] # 8 bpp by considering just the blue channel print decode((image[:, :, 0].astype('uint8').tobytes(), width, height))
import sys from encode import * from decode import * if __name__ == '__main__': result = '' if sys.argv[1] == 'encode': result = encode(sys.argv[2]) else: result = decode(sys.argv[2]) print(result)
#flt_fir2 = FIR2Filter(255, 1,15, fs=fs) #*--ФАПЧ канала 0------------------------------------------------------------- pll0 = pll2() pll0.scale_fs = 70 pll0.sign_moment = 4.5 pll0.Fs = Fs #*--ФАПЧ канала 90------------------------------------------------------------- pll90 = pll2() pll90.scale_fs = 70 pll90.sign_moment = 4.5 pll90.Fs = Fs #*--Декодер канала 0---------------------------------------------------------- decoder0 = decode() #*--Декодер канала 90 -------------------------------------------------------- decoder90 = decode() #*-------------------------------------------------------------------------- #*--конец инициализации узлов приемника--------------------------------------- to_plot = True start_time2 = time.clock() for i in range (len(t)): # главный цикл приемника #*--локальный генератор sin и cos------------------------------------------ gen0, gen90 = rx.local_gen(t)
print('Data memory: ', reg[0], '\tValue: ', reg[1]) print('\n') i = 0 while i in range(len(mem_list)): full_text = fetch_instruction(IMEM[i]) # Fetch instruction text_split = full_text.split() last_entry = text_split[-1] opcode_text = text_split[0] last_entry = int(last_entry) if text_split[0] in ['B']: i = i - int(text_split[1]) else: data_list = decode(RMEM, full_text) # Decode instruction and fetch registers # Instantiate register values reg1 = data_list[0] if len(data_list) == 2: reg2 = data_list[1] else: if text_split[0] in ['LDUR', 'STUR']: reg2 = 0 else: reg2 = int(text_split[1]) result_alu = alu(text_split[0], reg1, reg2) # Input registers into ALU if text_split[0] in ['CBZ', 'CBNZ']: if result_alu == False: