def DCN(trainpath,testpath,modelpath,dstl = False, target = True): train = load_data(trainpath) test = load_data(testpath) #good = load_data('data/mnist5kgood55k.pkl') if dstl: # train.dstl() test.dstl() if target: para = 9 else: para = 1 model, adv_accu = binary_model(train, test) false = detect(model,test) print('**********corrector************') region_model = CIFARModel(modelpath) t5 = time.time() c = Corrector(region_model, testpath, false, target = target,r=0.02, n = 50) error = c.correct() t6 = time.time() accuracy_good = (test.num - error[0])/test.num attack_success = (1-adv_accu)+error[1]/test.num/para print('accuracy_good:',accuracy_good) print('attack_success:',attack_success) print('time:', t6 -t5)
def execute(self): # Conexión al servidor corrector = Corrector(self.sd) # Comprobamos y parseamos el fichero autores.txt ficheroPractica, fp = corrector.extractFicheroPractica('autores.txt') # Comprobamos el empaquetado corrector.checkEmpaquetado(ficheroPractica.rstrip('\r\n ') + '.tar.gz') # Todo ha ido bien return self.getScore()
def __init__( self, datapath, ): print('Initializing retrival model...') self.data = pd.read_csv(datapath) self.tfidf = unpickle_file('model/tfidf.model') self.tfidf_vec = unpickle_file('data/doc_tfidf_vec.bin') self.inverse_idx = unpickle_file('data/inverse_idx_table.bin') self.word_2_id = unpickle_file('data/full_word2id.bin') self.id_2_word = {d: w for w, d in self.word_2_id.items()} self.word_2_id_for_filter = unpickle_file('data/tfidf_word2vec.bin') self.idf, self.avg_len = unpickle_file('data/idf_and_avglen.bin') self.word_vec = load_gensim_vec('data/wordvec_fasttext_300.txt') self.annoy = load_annoy_index('data/sentvec.ann') self.ltp = nlp.Ltp(seg=True, pos=True, seg_lexicon_path='data/lexicon_seg.txt', pos_lexicon_path='data/lexicon.txt') self.text_precess = nlp.ProcessText() self.stopwords = nlp.load_stop( ['data/chinese_stopwords.txt', 'data/哈工大停用词表.txt']) self.nonsense_word = ['请问', '想知道'] self.corrector = Corrector() print('Retrival model established.')
class CorrectHandler(tornado.web.RequestHandler): def __init__(self, application, request, **kwargs): super(CorrectHandler, self).__init__(application, request, **kwargs) self.corrector = Corrector() def get(self): ret_data = {'correct_result': []} try: query = self.get_argument('query', '') logging.info('Starting query: %s, port: %s', query, options.port) if not query: pass else: query = urllib.unquote(query) if (re.match(r'^[a-zA-Z0-9]+$', query) and len(query) <= 20) or len(query) <= 8: correct_result = self.corrector.get_correct_words(query) candidate = correct_result['candidate'] logging.info(u'query: {query}, status: {status}, msg: {msg}, candidate: {candidate}'\ .format(query = query, status = correct_result['status'], msg = correct_result['msg'], candidate = u','.join(candidate))) ret_data['correct_result'] = candidate else: ret_data['correct_result'] = [] except Exception, e: ret_data['error_code'] = -1 ret_data['error_msg'] = str(e) logging.error(e) finally:
def error_detection(self): error_sentences = self.textEdit.toPlainText() corrected_sent, err = Corrector.correct(c, error_sentences) text = '' last = 0 for i in err: text = text + "<font color='black'>" + error_sentences[last:i[2]] text = text + "<font color='red'>" + error_sentences[i[2]:i[3]] last = i[3] text = text + "<font color='black'>" + error_sentences[last:] self.textBrowser.setText(text)
def __init__(self): self.main = Corrector() self.match = "" self.matchgeo = "" self.commands = [] self.option = Options() self.display = Chunks("") self.display.create_Chunks() self.last_Display = "" self.find = Chunks("") self.find.create_Chunks() self.table = Outline() self.location_History = {}
def load(self): print "reading index" input = open('invertedIndex.pkl', 'rb') self.invertedIndex = cPickle.load(input) print "reading wordSet" input = open("wordSet.pkl", 'rb') self.wordSet = cPickle.load(input) print "reading dictionary" print "reading fileDictionary" input = open('fileName.pkl', 'rb') self.filedict = cPickle.load(input) print "loading vsm" self.vsm = VSM(self.invertedIndex, len(self.filedict)) self.corrector = Corrector('trainer')
from corrector import Corrector reader = 'Conll' if reader == 'MovieDialog': train_path = 'dataset/movie_lines.txt' test_path = 'dataset/test.txt' model_path = 'dialog_correcter_model' elif reader == 'Conll': train_path = 'dataset/CONLL/train.txt' test_path = 'dataset/CONLL/test.txt' model_path = 'conll_correcter_model' corrector = Corrector(train_path, test_path, model_path, reader=reader) corrector.train()
def __init__(self,name,chain,outputfile,mode) : self.name = name isdata = name.find('data')!=-1 self.chain = chain self.mode=mode self.corrector = Corrector(isdata,'no',TH1F('pileup','pileup',100,0.,100.),'B',{'alpha':1.0,'epsilon':1.0}) #Get relevant branches #Physics objects self.phyobjbs = {} themuon_pt = array('f',[-1.0]); self.chain.SetBranchAddress('eltrig_themuon_pt',themuon_pt); self.phyobjbs['themuon_pt'] = (themuon_pt,-1.0) themuon_eta = array('f',[100.0]); self.chain.SetBranchAddress('eltrig_themuon_eta',themuon_eta); self.phyobjbs['themuon_eta'] = (themuon_eta,100.0) theele_pt = array('f',[-1.0]); self.chain.SetBranchAddress('eltrig_theelectron_pt',theele_pt); self.phyobjbs['theele_pt'] = (theele_pt,-1.0) theele_eta = array('f',[100.0]); self.chain.SetBranchAddress('eltrig_theelectron_eta',theele_eta); self.phyobjbs['theele_eta'] = (theele_eta,100.0) evt_pu = array('i',[-1]); self.chain.SetBranchAddress('ngoodvtx',evt_pu); self.phyobjbs['evt_pu'] = (evt_pu,-1) evt_top = array('i',[0]); self.chain.SetBranchAddress('eventTopology',evt_top); self.phyobjbs['evt_top'] = (evt_top,0) nbTags = array('i',[0]); self.chain.SetBranchAddress('nbTags',nbTags); self.phyobjbs['nbTags'] = (nbTags,0) lepflavor = array('I',[0]); self.chain.SetBranchAddress('lepflavor',lepflavor); self.phyobjbs['lepflavor'] = (lepflavor,0) #selection and trigger variables self.selecpassbs = {} selectiontrig = array('I',[2]); self.chain.SetBranchAddress('eltrig_fullselection',selectiontrig); self.selecpassbs['selectiontrig'] = (selectiontrig,2) #mutrig = array('I',[2]); self.chain.SetBranchAddress('eltrig_mutrigger',mutrig); self.selecpassbs['mutrig'] = (mutrig,2) #isomu = array('I',[2]); self.chain.SetBranchAddress('eltrig_isomu',isomu); self.selecpassbs['isomu'] = (isomu,2) #isoel = array('I',[2]); self.chain.SetBranchAddress('eltrig_isoel',isoel); self.selecpassbs['isoel'] = (isoel,2) #twoleptons = array('I',[2]); self.chain.SetBranchAddress('eltrig_twoleptons',twoleptons); self.selecpassbs['twoleptons'] = (twoleptons,2) #opplepcharge = array('I',[2]); self.chain.SetBranchAddress('eltrig_opplepcharge',opplepcharge); self.selecpassbs['opplepcharge'] = (opplepcharge,2) #btags = array('I',[2]); self.chain.SetBranchAddress('eltrig_btags',btags); self.selecpassbs['btags'] = (btags,2) #lepWpT = array('I',[2]); self.chain.SetBranchAddress('eltrig_lepWpT',lepWpT); self.selecpassbs['lepWpT'] = (lepWpT,2) passtrig = array('I',[2]); self.chain.SetBranchAddress('eltrig_eltrigger',passtrig); self.selecpassbs['passtrig'] = (passtrig,2) #reweighting factors we can read from the files without recalculating self.rwbs = {} weight = array('f',[1.0]); self.chain.SetBranchAddress('weight',weight); self.rwbs['weight'] = (weight,1.0) sf_pileup = array('f',[1.0]); self.chain.SetBranchAddress('sf_pileup',sf_pileup); self.rwbs['sf_pileup'] = (sf_pileup,1.0) sf_btag_eff = array('f',[1.0]); self.chain.SetBranchAddress('sf_btag_eff',sf_btag_eff); self.rwbs['sf_btag_eff'] = (sf_btag_eff,1.0) sf_mu_R = array('f',[1.0]); self.chain.SetBranchAddress('sf_mu_R',sf_mu_R); self.rwbs['sf_mu_R'] = (sf_mu_R,1.0) sf_mu_F = array('f',[1.0]); self.chain.SetBranchAddress('sf_mu_F',sf_mu_F); self.rwbs['sf_mu_F'] = (sf_mu_F,1.0) sf_scale_comb = array('f',[1.0]); self.chain.SetBranchAddress('sf_scale_comb',sf_scale_comb); self.rwbs['sf_scale_comb'] = (sf_scale_comb,1.0) sf_pdf_alphas = array('f',[1.0]); self.chain.SetBranchAddress('sf_pdf_alphas',sf_pdf_alphas); self.rwbs['sf_pdf_alphas'] = (sf_pdf_alphas,1.0) #Set up output TTree tname = 'data_tree' if isdata else 'MC_tree' self.tree = TTree(tname,tname) self.otbs = {} el_pt = array('f',[-1.0]); self.tree.Branch('el_pt',el_pt,'el_pt/F'); self.otbs['el_pt'] = (el_pt,-1.0) el_eta = array('f',[100.]); self.tree.Branch('el_eta',el_eta,'el_eta/F'); self.otbs['el_eta'] = (el_eta,100.) pu = array('i',[-1]); self.tree.Branch('pu',pu,'pu/I'); self.otbs['pu'] = (pu,-1) topology = array('i',[0]); self.tree.Branch('topology',topology,'topology/I'); self.otbs['topology'] = (topology,0) pass_trig = array('I',[2]); self.tree.Branch('pass_trig',pass_trig,'pass_trig/i'); self.otbs['pass_trig'] = (pass_trig,2) evt_weight = array('f',[1.0]); self.tree.Branch('evt_weight',evt_weight,'evt_weight/F'); self.otbs['evt_weight'] = (evt_weight,1.0) self.allbranchdicts = [self.phyobjbs,self.selecpassbs,self.rwbs,self.otbs] #Set up histograms and efficiency graphs self.histos_and_graphs = [] self.ele_pt_all = TH1D(self.name+'_ele_pt_all',self.name+' electron p_{T} for all events; p_{T} (GeV)',n_ele_pt_bins,ele_pt_bins) self.histos_and_graphs.append(self.ele_pt_all) self.ele_eta_all = TH1D(self.name+'_ele_eta_all',self.name+' electron #eta for all events; #eta',n_eta_bins,eta_bins) self.histos_and_graphs.append(self.ele_eta_all) self.evt_pu_all = TH1D(self.name+'_evt_pu_all',self.name+' pileup for all events; # vertices',n_pu_bins,pu_bins) self.histos_and_graphs.append(self.evt_pu_all) self.ele_pt_pass = TH1D(self.name+'_ele_pt_pass',self.name+' electron p_{T} for passing events; p_{T} (GeV)',n_ele_pt_bins,ele_pt_bins) self.histos_and_graphs.append(self.ele_pt_pass) self.ele_eta_pass = TH1D(self.name+'_ele_eta_pass',self.name+' electron #eta for passing events; #eta',n_eta_bins,eta_bins) self.histos_and_graphs.append(self.ele_eta_pass) self.evt_pu_pass = TH1D(self.name+'_evt_pu_pass',self.name+' pileup for passing events; # vertices',n_pu_bins,pu_bins) self.histos_and_graphs.append(self.evt_pu_pass) self.histo_2d_all = TH2D(self.name+'_histo_2D_all','',n_ele_pt_bins_2D,ele_pt_bins_2D,n_eta_bins_2D,eta_bins_2D); self.histos_and_graphs.append(self.histo_2d_all) self.histo_2d_pass = TH2D(self.name+'_histo_2D_pass','',n_ele_pt_bins_2D,ele_pt_bins_2D,n_eta_bins_2D,eta_bins_2D); self.histos_and_graphs.append(self.histo_2d_pass) ele_pt_x = array('d',n_ele_pt_bins*[0.]) ele_pt_xe = array('d',n_ele_pt_bins*[0.]) ele_pt_y = array('d',n_ele_pt_bins*[0.]) ele_pt_ye = array('d',n_ele_pt_bins*[0.]) ele_eta_x = array('d',n_eta_bins*[0.]) ele_eta_xe = array('d',n_eta_bins*[0.]) ele_eta_y = array('d',n_eta_bins*[0.]) ele_eta_ye = array('d',n_eta_bins*[0.]) pu_x = array('d',n_pu_bins*[0.]) pu_xe = array('d',n_pu_bins*[0.]) pu_y = array('d',n_pu_bins*[0.]) pu_ye = array('d',n_pu_bins*[0.]) self.ele_pt_gr = TGraphErrors(n_ele_pt_bins,ele_pt_x,ele_pt_y,ele_pt_xe,ele_pt_ye); self.histos_and_graphs.append(self.ele_pt_gr) self.ele_pt_gr.SetName(self.name+'ele_pt_gr'); self.ele_pt_gr.SetTitle(self.name+' probe efficiency vs. electron p_{T}'); self.ele_pt_gr.GetXaxis().SetName('electron p_{T} (GeV)'); self.ele_pt_gr.GetYaxis().SetName('Probe efficiency') self.ele_eta_gr = TGraphErrors(n_eta_bins,ele_eta_x,ele_eta_y,ele_eta_xe,ele_eta_ye); self.histos_and_graphs.append(self.ele_eta_gr) self.ele_eta_gr.SetName(self.name+'ele_eta_gr'); self.ele_eta_gr.SetTitle(self.name+' probe efficiency vs. electron #eta'); self.ele_eta_gr.GetXaxis().SetName('#eta'); self.ele_eta_gr.GetYaxis().SetName('Probe efficiency') self.pu_gr = TGraphErrors(n_pu_bins,pu_x,pu_y,pu_xe,pu_ye); self.histos_and_graphs.append(self.pu_gr) self.pu_gr.SetName(self.name+'pu_gr'); self.pu_gr.SetTitle(self.name+' probe efficiency vs.pileup'); self.pu_gr.GetXaxis().SetName('pileup'); self.pu_gr.GetYaxis().SetName('Probe efficiency') #Counter count = 0 ########## Main Event Loop ########## print 'Filling trees for '+self.name+'. . .' nEntries = chain.GetEntries() for entry in range(nEntries) : #check the max events count+=1 if count == options.max_events+1 : print 'Processed event number '+str(count-1)+', exiting' break #print progress if count % options.print_every == 0 or count == 1: print 'Count at '+str(count)+' out of '+str(nEntries)+', (%.4f%% complete)'%(float(count) / float(nEntries) * 100.0) #reset all of the arrays holding tree branches for branchdict in self.allbranchdicts : for branchtuple in branchdict.values() : branchtuple[0][0] = branchtuple[1] chain.GetEntry(entry) ##readjust the trigger selection cuts (for now, until I fix how they're hardcoded) #selectiontrig[0] = 1 if (mutrig[0]==1 and isomu[0]==1 and isoel[0]==1 and twoleptons[0]==1 and opplepcharge[0]==1 and btags[0]==1 and lepWpT[0]==1) else 0 cuts = [] cuts.append(isdata or weight[0]!=1.0) #cuts only dependent on topology if options.topology=='boosted' : cuts.append(evt_top[0]<3) elif options.topology=='resolved' : cuts.append(evt_top[0]==3) #cuts for trigger analysis if mode=='t' : cuts.append(selectiontrig[0]==1) cuts.append(lepflavor[0]==2) #check all cuts if cuts.count(False) > 0 : continue #fill the tree el_pt[0] = theele_pt[0] el_eta[0] = theele_eta[0] pu[0] = evt_pu[0] topology[0] = evt_top[0] pass_trig[0] = passtrig[0] evt_weight[0] = self.__getEvtWeight__() self.tree.Fill() #fill the histograms (bring parameters in range) self.ele_pt_all.Fill(el_pt[0],evt_weight[0]) self.ele_eta_all.Fill(el_eta[0],evt_weight[0]) self.evt_pu_all.Fill(pu[0],evt_weight[0]) self.histo_2d_all.Fill(el_pt[0],abs(el_eta[0]),evt_weight[0]) if (mode=='t' and pass_trig[0]==1) : self.ele_pt_pass.Fill(el_pt[0],evt_weight[0]) self.ele_eta_pass.Fill(el_eta[0],evt_weight[0]) self.evt_pu_pass.Fill(pu[0],evt_weight[0]) self.histo_2d_pass.Fill(el_pt[0],abs(el_eta[0]),evt_weight[0]) print 'Done' #Make the graph y-values and errors for i in range(n_ele_pt_bins) : x_value = (ele_pt_bins[i+1]+ele_pt_bins[i])/2 x_err = (ele_pt_bins[i+1]-ele_pt_bins[i])/2 passing_events = self.ele_pt_pass.GetBinContent(self.ele_pt_pass.FindBin(x_value)) all_events = self.ele_pt_all.GetBinContent(self.ele_pt_all.FindBin(x_value)) y_value = 0.; y_err = 0. if all_events > 0. : y_value = passing_events/all_events if passing_events>0. : y_err = y_value*sqrt(1./passing_events+1./all_events) self.ele_pt_gr.SetPoint(i,x_value,y_value) self.ele_pt_gr.SetPointError(i,x_err,y_err) for i in range(n_eta_bins) : x_value = (eta_bins[i+1]+eta_bins[i])/2 x_err = (eta_bins[i+1]-eta_bins[i])/2 passing_events = self.ele_eta_pass.GetBinContent(self.ele_eta_pass.FindBin(x_value)) all_events = self.ele_eta_all.GetBinContent(self.ele_eta_all.FindBin(x_value)) y_value = 0.; y_err = 0. if all_events > 0. : y_value = passing_events/all_events if passing_events>0. : y_err = y_value*sqrt(1./passing_events+1./all_events) self.ele_eta_gr.SetPoint(i,x_value,y_value) self.ele_eta_gr.SetPointError(i,x_err,y_err) for i in range(n_pu_bins) : x_value = (pu_bins[i+1]+pu_bins[i])/2 x_err = (pu_bins[i+1]-pu_bins[i])/2 passing_events = self.evt_pu_pass.GetBinContent(self.evt_pu_pass.FindBin(x_value)) all_events = self.evt_pu_all.GetBinContent(self.evt_pu_all.FindBin(x_value)) y_value = 0.; y_err = 0. if all_events > 0. : y_value = passing_events/all_events if passing_events>0. : y_err = y_value*sqrt(1./passing_events+1./all_events) self.pu_gr.SetPoint(i,x_value,y_value) self.pu_gr.SetPointError(i,x_err,y_err) #Fit the graphs with constants, then save the fit functions as graphs self.ele_pt_const = TF1('ele_pt_const','[0]',ele_pt_bins[0],ele_pt_bins[n_ele_pt_bins]) self.ele_eta_const = TF1('ele_eta_const','[0]',eta_bins[0],eta_bins[n_eta_bins]) self.pu_const = TF1('pu_const','[0]',pu_bins[0],pu_bins[n_pu_bins]) self.ele_pt_gr.Fit('ele_pt_const') self.ele_eta_gr.Fit('ele_eta_const') self.pu_gr.Fit('pu_const') self.ele_pt_fit_value = self.ele_pt_const.GetParameter(0) self.ele_pt_fit_err = self.ele_pt_const.GetParError(0) self.ele_eta_fit_value = self.ele_eta_const.GetParameter(0) self.ele_eta_fit_err = self.ele_eta_const.GetParError(0) self.pu_fit_value = self.pu_const.GetParameter(0) self.pu_fit_err = self.pu_const.GetParError(0) self.ele_pt_fit_gr = TGraphErrors(n_ele_pt_bins,ele_pt_x,ele_pt_y,ele_pt_xe,ele_pt_ye) self.ele_eta_fit_gr = TGraphErrors(n_eta_bins,ele_eta_x,ele_eta_y,ele_eta_xe,ele_eta_ye) self.pu_fit_gr = TGraphErrors(n_pu_bins,pu_x,pu_y,pu_xe,pu_ye) for i in range(n_ele_pt_bins) : x_value = (ele_pt_bins[i+1]+ele_pt_bins[i])/2 x_err = (ele_pt_bins[i+1]-ele_pt_bins[i])/2 self.ele_pt_fit_gr.SetPoint(i,x_value,self.ele_pt_fit_value) self.ele_pt_fit_gr.SetPointError(i,x_err,self.ele_pt_fit_err) for i in range(n_eta_bins) : x_value = (eta_bins[i+1]+eta_bins[i])/2 x_err = (eta_bins[i+1]-eta_bins[i])/2 self.ele_eta_fit_gr.SetPoint(i,x_value,self.ele_eta_fit_value) self.ele_eta_fit_gr.SetPointError(i,x_err,self.ele_eta_fit_err) for i in range(n_pu_bins) : x_value = (pu_bins[i+1]+pu_bins[i])/2 x_err = (pu_bins[i+1]-pu_bins[i])/2 self.pu_fit_gr.SetPoint(i,x_value,self.pu_fit_value) self.pu_fit_gr.SetPointError(i,x_err,self.pu_fit_err) #Write the tree, histograms, and graphs outputfile.cd() self.tree.Write() for thing in self.histos_and_graphs : thing.Write()
#!/usr/bin/env python #coding=utf-8 import json from corrector import Corrector if __name__ == '__main__': corrector = Corrector('example.txt') print 'words:', json.dumps(corrector.nwords, ensure_ascii=False, indent=4) print '\n' for word in [u'刘弱英', u'陈牵强', u'刘若华', u'hel', u'helle']: print 'word:', word print 'corrected:', corrector.correct(word) print 'possibilities:', json.dumps(corrector.possibilities(word), ensure_ascii=False, indent=4) print '\n'
class Main(object): def __init__(self): self.main = Corrector() self.match = "" self.matchgeo = "" self.commands = [] self.option = Options() self.display = Chunks("") self.display.create_Chunks() self.last_Display = "" self.find = Chunks("") self.find.create_Chunks() self.table = Outline() self.location_History = {} def run(self): hlp = Helper();br=True;results={};resultsort=[];correcting = False while br: if correcting or self.matchgeo: pass else: command = raw_input("SMS in: ") if not self.matchgeo: t,option,word = self.process(unicode(command)) words = t[0];disting = t[1][-1] if option and len(resultsort)==0: self.option = option;self.match = word if len(self.display) > 0 : self.last_Display = self.display self.display = Chunks(self.option.print_Options()) self.display.create_Chunks(heading="By ({0}) Did you mean: ".format(word), footing="Please choose.") print self.display.goto_Chunk(); continue if len(self.option)>0 and len(words[0])==1: ch = self.option.select_Option(words[0]) if ch!=None and len(resultsort)==0: self.main.correctedHistory[self.match]=self.option[ch][2] disting = t[1][-2]; k = disting['tokens'].index(self.match) disting['tokens'][k] = self.option[ch][2] try: k = disting['words'].index(self.match) disting['words'][k] = self.option[ch][2] words = disting['words'] except: disting['words'].append(self.option[ch][2]) words = disting['words'] if self.option[ch][2] == "find": word = "find";self.option = Options() else: self.option = Options();correcting = True command = " ".join(disting['tokens']); continue if ch!=None and type(resultsort)!=unicode: text = "{0} - Ratings : {1}".format(resultsort[ch][0],resultsort[ch][1]) text += "\n" +" Telephone Number: " + results[resultsort[ch][0]][1] text += "\n Address: "+results[resultsort[ch][0]][0] self.display = Chunks(text) self.display.create_Chunks() print self.display.goto_Chunk() self.option = Options();continue if ch!=None and type(resultsort)==unicode: self.matchgeo = [results[ch]]; self.location_History[resultsort] = [results[ch]] disting = t[1][-2]; words = disting['words'];self.option = Options() continue correcting = False if word == "find" or "find" in words: if word == "find": self.find = self.last_Display else: self.find = self.display expand = self.find.find_Chunks(" ".join([i for i in disting['tokens'] if i!="find"])) if expand!=False: self.find.chunk_list = expand print self.find.goto_Chunk() else: print "No results found" continue for k in range(len(words)): dirfin = finder(r'directory|places',words[k]) if dirfin.found(): if "list" in words: self.display = Chunks(",".join(hlp.dirtypes()).replace("_"," ")) self.display.create_Chunks(heading="List of types of places:") print self.display.goto_Chunk(); break direc=Directory([i for i in words if i!="directory" or i!="places"]) if len(self.matchgeo) > 0: direc.locs=self.matchgeo; self.matchgeo = "" results,resultsort = direc.run(disting,self.location_History) if results == None: break elif type(resultsort) == unicode: for i in results: self.option.add_Option(content="{0}".format(i[0].encode('utf-8'))) self.display = Chunks(self.option.print_Options()) self.display.create_Chunks(heading="By {0} did you mean:".format(str(resultsort)), footing="Please choose a location. ") print self.display.goto_Chunk(); break for i in resultsort: self.option.add_Option(content="{0} - Ratings : {1}".format(i[0].encode('utf-8'),str(i[1]))) self.display = Chunks(self.option.print_Options()) self.display.create_Chunks(heading="Nearby places:", footing="Choose for more details. ") print self.display.goto_Chunk(); break outfin = finder(r'outline',words[k]) if outfin.found(): with open("textblock.txt","rb") as f: textblock= f.read().decode("string_escape") for i in range(len(textblock)): if textblock[i:i+2] == "\u": textblock= textblock[:i]+unichr(int(textblock[i+2:i+6],base=16)).encode("utf-8") +textblock[i+6:] f.close() self.table.add_TxtBlock(textblock) if self.table.run([i for i in words if i !="outline"],disting['numbers']) != False: self.display = self.table.run([i for i in words if i !="outline"],disting['numbers']) print self.display.goto_Chunk(); break exifin = finder(r'exit',words[k]) if exifin.found(): br=False;break helpfin = finder(r'help',words[k]) if helpfin.found(): print hlp;break if "next" in words[k]: print self.display.next_Chunk(); break if "goto" in words[k]: try: n = disting['numbers'][0] num = int(n) except: num = -1 print self.display.goto_Chunk(num); break def process(self,command): opts = [];self.main.disting(command) for i in self.main.cur()['words']+self.main.cur()['splits']: for j in self.main.match(i): if j[0]=='None': continue if j[1]==0: try: k = self.main.cur()['words'].index(i) self.main.cur()['words'][k] = j[0] except: self.main.cur()['words'].append(j[0]) continue if opts == []: opts = Options() opts.add_Option(content="{0}".format(j[0])) if opts: return [self.main.cur()['words'],self.main.history],opts,i return [self.main.cur()['words'],self.main.history],opts,""
def setUp(self): self.corrector = Corrector()
from PyQt5.QtCore import * from PyQt5.QtWidgets import * from PyQt5.QtGui import * import numpy as np from nlp import Ui_Form from utils.langconv import * from set_custom import Ui_widget from playsound import playsound from test import speech_recognition from corrector import Corrector c = Corrector() c.set_custom_confusion_dict('./my_custom.txt') class MainWindow(QMainWindow, Ui_Form): def __init__(self, parent=None): super(MainWindow, self).__init__(parent) self.setupUi(self) pix = QPixmap('index.jpg').scaled(self.label_3.width(), self.label_3.height()) self.label_3.setPixmap(pix) self.pushButton_1.clicked.connect(self.error_detection) self.pushButton_2.clicked.connect(self.error_correction) self.pushButton_3.clicked.connect(self.set_custom) self.pushButton_4.clicked.connect(self.add_speech) self.pushButton_5.clicked.connect(self.simplified2traditional) self.pushButton_6.clicked.connect(self.play) def error_detection(self): error_sentences = self.textEdit.toPlainText() corrected_sent, err = Corrector.correct(c, error_sentences)
from flask import Flask, render_template, request from corrector import Corrector import json import logging app = Flask(__name__) corrector = Corrector() if app.debug: corrector.logger.setLevel(logging.DEBUG) @app.route("/") def home(): return render_template("home.html", guias = corrector.nombres_guias()) @app.route("/guia/<titulo>") def mostrar_guia(titulo): assert titulo in corrector.nombres_guias() # Definimos los parámetros de datos para los ejercicios. ejercicios = corrector.ejercicios_de(titulo) params = [] for ejercicio in ejercicios: nparams = len(ejercicio["archivos_entrada"].split(",")) if nparams == 1: params.append("datos") else: # "datos1, datos2, ..." params.append(", ".join(["datos"+str(i+1) for i in range(nparams)])) return render_template("guia.html", guias = corrector.nombres_guias(),
from corrector import Corrector reader = 'Conll' if reader == 'MovieDialog': train_path = 'dataset/movie_lines.txt' test_path = 'dataset/test.txt' model_path = 'dialog_correcter_model' elif reader == 'Conll': train_path = 'dataset/CONLL/train.txt' test_path = 'dataset/CONLL/test.txt' model_path = 'conll_correcter_model' example_path = 'dataset/example.txt' corrector = Corrector(train_path, test_path, model_path, reader) corrector.corrector_init() def process(sent): result = corrector.correct(sent) result, alt, err = corrector.correct(sent) print('Output:', result) # print('-'*30) for r, e in zip(alt, err): x = r[0][0] if r[0] else None y = r[1][0] if r[1] else None if not x: print('{:>13} ERROR Add: {} '.format(e, y)) elif not y: print('{:>13} ERROR Remove: {} '.format(e, x)) else:
def __init__(self, application, request, **kwargs): super(CorrectHandler, self).__init__(application, request, **kwargs) self.corrector = Corrector()
class CorrectTest(unittest.TestCase): """Test cases for Corrector""" def setUp(self): self.corrector = Corrector() def test_equals(self): self.assertEqual(self.corrector.correction('somthing'), 'something') # insert self.assertEqual(self.corrector.correction('speling'), 'seeing') # insert self.assertEqual(self.corrector.correction('korrectud'), 'corrected') # replace 2 self.assertEqual(self.corrector.correction('coryright'), 'copyright') # replace self.assertEqual(self.corrector.correction('aventure'), 'adventure') # insert 2 self.assertEqual(self.corrector.correction('additonel'), 'additional') # delete self.assertEqual(self.corrector.correction('peotry'), 'poetry') # transpose self.assertEqual(self.corrector.correction('peotryy'), 'poetry') # transpose + delete self.assertEqual(self.corrector.correction('word'), 'word') # known self.assertEqual(self.corrector.correction('quintessential'), 'quintessential') # unknown self.assertEqual(self.corrector.correction('moring'), 'morning') # insert