示例#1
0
def DCN(trainpath,testpath,modelpath,dstl = False, target = True):

	train = load_data(trainpath)
	test = load_data(testpath)
	#good = load_data('data/mnist5kgood55k.pkl')

	if dstl:
#		train.dstl()
		test.dstl()

	if target:
		para = 9
	else:
		para = 1

	model, adv_accu = binary_model(train, test)
	false = detect(model,test)
	print('**********corrector************')
	region_model = CIFARModel(modelpath)
	t5 = time.time()
	c = Corrector(region_model, testpath, false, target = target,r=0.02, n = 50)
	error = c.correct()
	t6 = time.time()

	accuracy_good = (test.num - error[0])/test.num
	attack_success = (1-adv_accu)+error[1]/test.num/para
	print('accuracy_good:',accuracy_good)
	print('attack_success:',attack_success)
	print('time:', t6 -t5)
示例#2
0
 def execute(self):        
                     
     # Conexión al servidor
     corrector = Corrector(self.sd)                                                             
     
     # Comprobamos y parseamos el fichero autores.txt
     ficheroPractica, fp = corrector.extractFicheroPractica('autores.txt')
     
     # Comprobamos el empaquetado
     corrector.checkEmpaquetado(ficheroPractica.rstrip('\r\n ') + '.tar.gz')
     
     # Todo ha ido bien
     return self.getScore()
示例#3
0
    def execute(self):

        # Conexión al servidor
        corrector = Corrector(self.sd)

        # Comprobamos y parseamos el fichero autores.txt
        ficheroPractica, fp = corrector.extractFicheroPractica('autores.txt')

        # Comprobamos el empaquetado
        corrector.checkEmpaquetado(ficheroPractica.rstrip('\r\n ') + '.tar.gz')

        # Todo ha ido bien
        return self.getScore()
示例#4
0
    def __init__(
        self,
        datapath,
    ):
        print('Initializing retrival model...')
        self.data = pd.read_csv(datapath)
        self.tfidf = unpickle_file('model/tfidf.model')

        self.tfidf_vec = unpickle_file('data/doc_tfidf_vec.bin')
        self.inverse_idx = unpickle_file('data/inverse_idx_table.bin')

        self.word_2_id = unpickle_file('data/full_word2id.bin')
        self.id_2_word = {d: w for w, d in self.word_2_id.items()}
        self.word_2_id_for_filter = unpickle_file('data/tfidf_word2vec.bin')
        self.idf, self.avg_len = unpickle_file('data/idf_and_avglen.bin')

        self.word_vec = load_gensim_vec('data/wordvec_fasttext_300.txt')
        self.annoy = load_annoy_index('data/sentvec.ann')

        self.ltp = nlp.Ltp(seg=True,
                           pos=True,
                           seg_lexicon_path='data/lexicon_seg.txt',
                           pos_lexicon_path='data/lexicon.txt')
        self.text_precess = nlp.ProcessText()

        self.stopwords = nlp.load_stop(
            ['data/chinese_stopwords.txt', 'data/哈工大停用词表.txt'])
        self.nonsense_word = ['请问', '想知道']

        self.corrector = Corrector()
        print('Retrival model established.')
示例#5
0
class CorrectHandler(tornado.web.RequestHandler):
    def __init__(self, application, request, **kwargs):
        super(CorrectHandler, self).__init__(application, request, **kwargs)
        self.corrector = Corrector()

    def get(self):
        ret_data = {'correct_result': []}
        try:
            query = self.get_argument('query', '')
            logging.info('Starting query: %s, port: %s', query, options.port)
            if not query:
                pass
            else:
                query = urllib.unquote(query)
                if (re.match(r'^[a-zA-Z0-9]+$', query)
                        and len(query) <= 20) or len(query) <= 8:
                    correct_result = self.corrector.get_correct_words(query)
                    candidate = correct_result['candidate']
                    logging.info(u'query: {query}, status: {status}, msg: {msg}, candidate: {candidate}'\
                        .format(query = query,
                            status = correct_result['status'],
                            msg = correct_result['msg'],
                            candidate = u','.join(candidate)))
                    ret_data['correct_result'] = candidate
                else:
                    ret_data['correct_result'] = []
        except Exception, e:
            ret_data['error_code'] = -1
            ret_data['error_msg'] = str(e)
            logging.error(e)
        finally:
示例#6
0
 def error_detection(self):
     error_sentences = self.textEdit.toPlainText()
     corrected_sent, err = Corrector.correct(c, error_sentences)
     text = ''
     last = 0
     for i in err:
         text = text + "<font color='black'>" + error_sentences[last:i[2]]
         text = text + "<font color='red'>" + error_sentences[i[2]:i[3]]
         last = i[3]
     text = text + "<font color='black'>" + error_sentences[last:]
     self.textBrowser.setText(text)
示例#7
0
 def __init__(self):
     self.main = Corrector()
     self.match = ""
     self.matchgeo = ""
     self.commands = []
     self.option = Options()
     self.display = Chunks("")
     self.display.create_Chunks()
     self.last_Display = ""
     self.find = Chunks("")
     self.find.create_Chunks()
     self.table = Outline()
     self.location_History = {}
    def load(self):
        print "reading index"
        input = open('invertedIndex.pkl', 'rb')
        self.invertedIndex = cPickle.load(input)
        print "reading wordSet"
        input = open("wordSet.pkl", 'rb')
        self.wordSet = cPickle.load(input)
        print "reading dictionary"
        print "reading fileDictionary"
        input = open('fileName.pkl', 'rb')
        self.filedict = cPickle.load(input)

        print "loading vsm"
        self.vsm = VSM(self.invertedIndex, len(self.filedict))
        self.corrector = Corrector('trainer')
示例#9
0
from corrector import Corrector

reader = 'Conll'

if reader == 'MovieDialog':
    train_path = 'dataset/movie_lines.txt'
    test_path = 'dataset/test.txt'
    model_path = 'dialog_correcter_model'
elif reader == 'Conll':
    train_path = 'dataset/CONLL/train.txt'
    test_path = 'dataset/CONLL/test.txt'
    model_path = 'conll_correcter_model'

corrector = Corrector(train_path, test_path, model_path, reader=reader)
corrector.train()
	def __init__(self,name,chain,outputfile,mode) :
		self.name = name
		isdata = name.find('data')!=-1
		self.chain = chain
		self.mode=mode
		self.corrector = Corrector(isdata,'no',TH1F('pileup','pileup',100,0.,100.),'B',{'alpha':1.0,'epsilon':1.0})
		#Get relevant branches
		#Physics objects
		self.phyobjbs = {}
		themuon_pt 	  = array('f',[-1.0]);  self.chain.SetBranchAddress('eltrig_themuon_pt',themuon_pt); 	  self.phyobjbs['themuon_pt'] = (themuon_pt,-1.0)
		themuon_eta   = array('f',[100.0]); self.chain.SetBranchAddress('eltrig_themuon_eta',themuon_eta); 	  self.phyobjbs['themuon_eta'] = (themuon_eta,100.0)
		theele_pt 	  = array('f',[-1.0]);  self.chain.SetBranchAddress('eltrig_theelectron_pt',theele_pt);   self.phyobjbs['theele_pt'] = (theele_pt,-1.0)
		theele_eta 	  = array('f',[100.0]); self.chain.SetBranchAddress('eltrig_theelectron_eta',theele_eta); self.phyobjbs['theele_eta'] = (theele_eta,100.0)
		evt_pu 		  = array('i',[-1]); 	self.chain.SetBranchAddress('ngoodvtx',evt_pu); 				  self.phyobjbs['evt_pu'] = (evt_pu,-1)
		evt_top 	  = array('i',[0]); 	self.chain.SetBranchAddress('eventTopology',evt_top); 			  self.phyobjbs['evt_top'] = (evt_top,0)
		nbTags 		  = array('i',[0]); 	self.chain.SetBranchAddress('nbTags',nbTags); 					  self.phyobjbs['nbTags'] = (nbTags,0)
		lepflavor 	  = array('I',[0]); 	self.chain.SetBranchAddress('lepflavor',lepflavor); 			  self.phyobjbs['lepflavor'] = (lepflavor,0)
		#selection and trigger variables
		self.selecpassbs = {}
		selectiontrig = array('I',[2]); self.chain.SetBranchAddress('eltrig_fullselection',selectiontrig); self.selecpassbs['selectiontrig'] = (selectiontrig,2)
		#mutrig 		  = array('I',[2]); self.chain.SetBranchAddress('eltrig_mutrigger',mutrig); 		   self.selecpassbs['mutrig'] = (mutrig,2)
		#isomu 		  = array('I',[2]); self.chain.SetBranchAddress('eltrig_isomu',isomu); 				   self.selecpassbs['isomu'] = (isomu,2)
		#isoel 		  = array('I',[2]); self.chain.SetBranchAddress('eltrig_isoel',isoel); 				   self.selecpassbs['isoel'] = (isoel,2)
		#twoleptons 	  = array('I',[2]); self.chain.SetBranchAddress('eltrig_twoleptons',twoleptons); 	   self.selecpassbs['twoleptons'] = (twoleptons,2)
		#opplepcharge  = array('I',[2]); self.chain.SetBranchAddress('eltrig_opplepcharge',opplepcharge);   self.selecpassbs['opplepcharge'] = (opplepcharge,2)
		#btags 		  = array('I',[2]); self.chain.SetBranchAddress('eltrig_btags',btags); 				   self.selecpassbs['btags'] = (btags,2)
		#lepWpT 		  = array('I',[2]); self.chain.SetBranchAddress('eltrig_lepWpT',lepWpT); 			   self.selecpassbs['lepWpT'] = (lepWpT,2)
		passtrig 	  = array('I',[2]); self.chain.SetBranchAddress('eltrig_eltrigger',passtrig); 		   self.selecpassbs['passtrig'] = (passtrig,2)
		#reweighting factors we can read from the files without recalculating
		self.rwbs = {}
		weight 		  = array('f',[1.0]);  self.chain.SetBranchAddress('weight',weight); 			   self.rwbs['weight'] = (weight,1.0)
		sf_pileup 	  = array('f',[1.0]);  self.chain.SetBranchAddress('sf_pileup',sf_pileup); 		   self.rwbs['sf_pileup'] = (sf_pileup,1.0)
		sf_btag_eff   = array('f',[1.0]);  self.chain.SetBranchAddress('sf_btag_eff',sf_btag_eff); 	   self.rwbs['sf_btag_eff'] = (sf_btag_eff,1.0)
		sf_mu_R 	  = array('f',[1.0]);  self.chain.SetBranchAddress('sf_mu_R',sf_mu_R);			   self.rwbs['sf_mu_R'] = (sf_mu_R,1.0)
		sf_mu_F 	  = array('f',[1.0]);  self.chain.SetBranchAddress('sf_mu_F',sf_mu_F); 			   self.rwbs['sf_mu_F'] = (sf_mu_F,1.0)
		sf_scale_comb = array('f',[1.0]);  self.chain.SetBranchAddress('sf_scale_comb',sf_scale_comb); self.rwbs['sf_scale_comb'] = (sf_scale_comb,1.0)
		sf_pdf_alphas = array('f',[1.0]);  self.chain.SetBranchAddress('sf_pdf_alphas',sf_pdf_alphas); self.rwbs['sf_pdf_alphas'] = (sf_pdf_alphas,1.0)
		#Set up output TTree
		tname = 'data_tree' if isdata else 'MC_tree'
		self.tree  = TTree(tname,tname)
		self.otbs = {}
		el_pt 	   = array('f',[-1.0]); self.tree.Branch('el_pt',el_pt,'el_pt/F'); self.otbs['el_pt'] = (el_pt,-1.0)
		el_eta 	   = array('f',[100.]); self.tree.Branch('el_eta',el_eta,'el_eta/F'); self.otbs['el_eta'] = (el_eta,100.)
		pu 		   = array('i',[-1]);   self.tree.Branch('pu',pu,'pu/I'); self.otbs['pu'] = (pu,-1)
		topology   = array('i',[0]); 	self.tree.Branch('topology',topology,'topology/I'); self.otbs['topology'] = (topology,0)
		pass_trig  = array('I',[2]);    self.tree.Branch('pass_trig',pass_trig,'pass_trig/i'); self.otbs['pass_trig'] = (pass_trig,2)
		evt_weight = array('f',[1.0]); 	self.tree.Branch('evt_weight',evt_weight,'evt_weight/F'); self.otbs['evt_weight'] = (evt_weight,1.0)
		self.allbranchdicts = [self.phyobjbs,self.selecpassbs,self.rwbs,self.otbs]
		#Set up histograms and efficiency graphs
		self.histos_and_graphs = []
		self.ele_pt_all   = TH1D(self.name+'_ele_pt_all',self.name+' electron p_{T} for all events; p_{T} (GeV)',n_ele_pt_bins,ele_pt_bins) 
		self.histos_and_graphs.append(self.ele_pt_all)
		self.ele_eta_all  = TH1D(self.name+'_ele_eta_all',self.name+' electron #eta for all events; #eta',n_eta_bins,eta_bins) 
		self.histos_and_graphs.append(self.ele_eta_all)
		self.evt_pu_all   = TH1D(self.name+'_evt_pu_all',self.name+' pileup for all events; # vertices',n_pu_bins,pu_bins) 
		self.histos_and_graphs.append(self.evt_pu_all)
		self.ele_pt_pass  = TH1D(self.name+'_ele_pt_pass',self.name+' electron p_{T} for passing events; p_{T} (GeV)',n_ele_pt_bins,ele_pt_bins) 
		self.histos_and_graphs.append(self.ele_pt_pass)
		self.ele_eta_pass = TH1D(self.name+'_ele_eta_pass',self.name+' electron #eta for passing events; #eta',n_eta_bins,eta_bins) 
		self.histos_and_graphs.append(self.ele_eta_pass)
		self.evt_pu_pass  = TH1D(self.name+'_evt_pu_pass',self.name+' pileup for passing events; # vertices',n_pu_bins,pu_bins) 
		self.histos_and_graphs.append(self.evt_pu_pass)
		self.histo_2d_all  = TH2D(self.name+'_histo_2D_all','',n_ele_pt_bins_2D,ele_pt_bins_2D,n_eta_bins_2D,eta_bins_2D); self.histos_and_graphs.append(self.histo_2d_all)
		self.histo_2d_pass = TH2D(self.name+'_histo_2D_pass','',n_ele_pt_bins_2D,ele_pt_bins_2D,n_eta_bins_2D,eta_bins_2D); self.histos_and_graphs.append(self.histo_2d_pass)
		ele_pt_x   = array('d',n_ele_pt_bins*[0.])
		ele_pt_xe  = array('d',n_ele_pt_bins*[0.])
		ele_pt_y   = array('d',n_ele_pt_bins*[0.])
		ele_pt_ye  = array('d',n_ele_pt_bins*[0.])
		ele_eta_x  = array('d',n_eta_bins*[0.])
		ele_eta_xe = array('d',n_eta_bins*[0.])
		ele_eta_y  = array('d',n_eta_bins*[0.])
		ele_eta_ye = array('d',n_eta_bins*[0.])
		pu_x 	   = array('d',n_pu_bins*[0.])
		pu_xe 	   = array('d',n_pu_bins*[0.])
		pu_y 	   = array('d',n_pu_bins*[0.])
		pu_ye 	   = array('d',n_pu_bins*[0.])
		self.ele_pt_gr    = TGraphErrors(n_ele_pt_bins,ele_pt_x,ele_pt_y,ele_pt_xe,ele_pt_ye); self.histos_and_graphs.append(self.ele_pt_gr)
		self.ele_pt_gr.SetName(self.name+'ele_pt_gr'); self.ele_pt_gr.SetTitle(self.name+' probe efficiency vs. electron p_{T}'); 
		self.ele_pt_gr.GetXaxis().SetName('electron p_{T} (GeV)'); self.ele_pt_gr.GetYaxis().SetName('Probe efficiency')
		self.ele_eta_gr   = TGraphErrors(n_eta_bins,ele_eta_x,ele_eta_y,ele_eta_xe,ele_eta_ye); self.histos_and_graphs.append(self.ele_eta_gr)
		self.ele_eta_gr.SetName(self.name+'ele_eta_gr'); self.ele_eta_gr.SetTitle(self.name+' probe efficiency vs. electron #eta'); 
		self.ele_eta_gr.GetXaxis().SetName('#eta'); self.ele_eta_gr.GetYaxis().SetName('Probe efficiency')
		self.pu_gr   = TGraphErrors(n_pu_bins,pu_x,pu_y,pu_xe,pu_ye); self.histos_and_graphs.append(self.pu_gr)
		self.pu_gr.SetName(self.name+'pu_gr'); self.pu_gr.SetTitle(self.name+' probe efficiency vs.pileup'); 
		self.pu_gr.GetXaxis().SetName('pileup'); self.pu_gr.GetYaxis().SetName('Probe efficiency')
		#Counter
		count = 0
		##########								Main Event Loop								##########
		print 'Filling trees for '+self.name+'. . .'
		nEntries = chain.GetEntries()
		for entry in range(nEntries) :
			#check the max events
			count+=1
			if count == options.max_events+1 :
				print 'Processed event number '+str(count-1)+', exiting'
				break
			#print progress
			if count % options.print_every == 0 or count == 1:
				print 'Count at '+str(count)+' out of '+str(nEntries)+', (%.4f%% complete)'%(float(count) / float(nEntries) * 100.0)

			#reset all of the arrays holding tree branches
			for branchdict in self.allbranchdicts :
				for branchtuple in branchdict.values() :
					branchtuple[0][0] = branchtuple[1]
			
			chain.GetEntry(entry)

			##readjust the trigger selection cuts (for now, until I fix how they're hardcoded)
			#selectiontrig[0] = 1 if (mutrig[0]==1 and isomu[0]==1 and isoel[0]==1 and twoleptons[0]==1 and opplepcharge[0]==1 and btags[0]==1 and lepWpT[0]==1) else 0 		  
			
			cuts = []
			cuts.append(isdata or weight[0]!=1.0)
			#cuts only dependent on topology
			if options.topology=='boosted' :
				cuts.append(evt_top[0]<3)
			elif options.topology=='resolved' :
				cuts.append(evt_top[0]==3)
			#cuts for trigger analysis
			if mode=='t' :
				cuts.append(selectiontrig[0]==1)
				cuts.append(lepflavor[0]==2)
			#check all cuts
			if cuts.count(False) > 0 :
				continue
			
			#fill the tree
			el_pt[0] = theele_pt[0] 
			el_eta[0] = theele_eta[0]
			pu[0] = evt_pu[0]
			topology[0] = evt_top[0]
			pass_trig[0] = passtrig[0]
			evt_weight[0] = self.__getEvtWeight__()
			self.tree.Fill()

			#fill the histograms (bring parameters in range)
			self.ele_pt_all.Fill(el_pt[0],evt_weight[0])
			self.ele_eta_all.Fill(el_eta[0],evt_weight[0])
			self.evt_pu_all.Fill(pu[0],evt_weight[0])
			self.histo_2d_all.Fill(el_pt[0],abs(el_eta[0]),evt_weight[0])
			if (mode=='t' and pass_trig[0]==1) :
				self.ele_pt_pass.Fill(el_pt[0],evt_weight[0])
				self.ele_eta_pass.Fill(el_eta[0],evt_weight[0])
				self.evt_pu_pass.Fill(pu[0],evt_weight[0])
				self.histo_2d_pass.Fill(el_pt[0],abs(el_eta[0]),evt_weight[0])

		print 'Done'
		
		#Make the graph y-values and errors
		for i in range(n_ele_pt_bins) :
			x_value = (ele_pt_bins[i+1]+ele_pt_bins[i])/2
			x_err   = (ele_pt_bins[i+1]-ele_pt_bins[i])/2
			passing_events = self.ele_pt_pass.GetBinContent(self.ele_pt_pass.FindBin(x_value))
			all_events = self.ele_pt_all.GetBinContent(self.ele_pt_all.FindBin(x_value))
			y_value = 0.; y_err = 0.
			if all_events > 0. :
				y_value = passing_events/all_events
				if passing_events>0. :
					y_err = y_value*sqrt(1./passing_events+1./all_events)
			self.ele_pt_gr.SetPoint(i,x_value,y_value)
			self.ele_pt_gr.SetPointError(i,x_err,y_err)
		for i in range(n_eta_bins) :
			x_value = (eta_bins[i+1]+eta_bins[i])/2
			x_err   = (eta_bins[i+1]-eta_bins[i])/2
			passing_events = self.ele_eta_pass.GetBinContent(self.ele_eta_pass.FindBin(x_value))
			all_events = self.ele_eta_all.GetBinContent(self.ele_eta_all.FindBin(x_value))
			y_value = 0.; y_err = 0.
			if all_events > 0. :
				y_value = passing_events/all_events
				if passing_events>0. :
					y_err = y_value*sqrt(1./passing_events+1./all_events)
			self.ele_eta_gr.SetPoint(i,x_value,y_value)
			self.ele_eta_gr.SetPointError(i,x_err,y_err)
		for i in range(n_pu_bins) :
			x_value = (pu_bins[i+1]+pu_bins[i])/2
			x_err   = (pu_bins[i+1]-pu_bins[i])/2
			passing_events = self.evt_pu_pass.GetBinContent(self.evt_pu_pass.FindBin(x_value))
			all_events = self.evt_pu_all.GetBinContent(self.evt_pu_all.FindBin(x_value))
			y_value = 0.; y_err = 0.
			if all_events > 0. :
				y_value = passing_events/all_events
				if passing_events>0. :
					y_err = y_value*sqrt(1./passing_events+1./all_events)
			self.pu_gr.SetPoint(i,x_value,y_value)
			self.pu_gr.SetPointError(i,x_err,y_err)
		#Fit the graphs with constants, then save the fit functions as graphs
		self.ele_pt_const = TF1('ele_pt_const','[0]',ele_pt_bins[0],ele_pt_bins[n_ele_pt_bins])
		self.ele_eta_const = TF1('ele_eta_const','[0]',eta_bins[0],eta_bins[n_eta_bins])
		self.pu_const = TF1('pu_const','[0]',pu_bins[0],pu_bins[n_pu_bins])
		self.ele_pt_gr.Fit('ele_pt_const')
		self.ele_eta_gr.Fit('ele_eta_const')
		self.pu_gr.Fit('pu_const')
		self.ele_pt_fit_value  = self.ele_pt_const.GetParameter(0)
		self.ele_pt_fit_err    = self.ele_pt_const.GetParError(0)
		self.ele_eta_fit_value = self.ele_eta_const.GetParameter(0)
		self.ele_eta_fit_err   = self.ele_eta_const.GetParError(0)
		self.pu_fit_value 	   = self.pu_const.GetParameter(0)
		self.pu_fit_err 	   = self.pu_const.GetParError(0)
		self.ele_pt_fit_gr 	= TGraphErrors(n_ele_pt_bins,ele_pt_x,ele_pt_y,ele_pt_xe,ele_pt_ye)
		self.ele_eta_fit_gr = TGraphErrors(n_eta_bins,ele_eta_x,ele_eta_y,ele_eta_xe,ele_eta_ye)
		self.pu_fit_gr 		= TGraphErrors(n_pu_bins,pu_x,pu_y,pu_xe,pu_ye)
		for i in range(n_ele_pt_bins) :
			x_value = (ele_pt_bins[i+1]+ele_pt_bins[i])/2
			x_err   = (ele_pt_bins[i+1]-ele_pt_bins[i])/2
			self.ele_pt_fit_gr.SetPoint(i,x_value,self.ele_pt_fit_value)
			self.ele_pt_fit_gr.SetPointError(i,x_err,self.ele_pt_fit_err)
		for i in range(n_eta_bins) :
			x_value = (eta_bins[i+1]+eta_bins[i])/2
			x_err   = (eta_bins[i+1]-eta_bins[i])/2
			self.ele_eta_fit_gr.SetPoint(i,x_value,self.ele_eta_fit_value)
			self.ele_eta_fit_gr.SetPointError(i,x_err,self.ele_eta_fit_err)
		for i in range(n_pu_bins) :
			x_value = (pu_bins[i+1]+pu_bins[i])/2
			x_err   = (pu_bins[i+1]-pu_bins[i])/2
			self.pu_fit_gr.SetPoint(i,x_value,self.pu_fit_value)
			self.pu_fit_gr.SetPointError(i,x_err,self.pu_fit_err)
		#Write the tree, histograms, and graphs
		outputfile.cd()
		self.tree.Write()
		for thing in self.histos_and_graphs :
			thing.Write()	
示例#11
0
#!/usr/bin/env python
#coding=utf-8

import json
from corrector import Corrector

if __name__ == '__main__':
    corrector = Corrector('example.txt')
    print 'words:', json.dumps(corrector.nwords, ensure_ascii=False, indent=4)

    print '\n'

    for word in [u'刘弱英', u'陈牵强', u'刘若华', u'hel', u'helle']:
        print 'word:', word
        print 'corrected:', corrector.correct(word)
        print 'possibilities:', json.dumps(corrector.possibilities(word),
                                           ensure_ascii=False, indent=4)
        print '\n'
示例#12
0
class Main(object):
    def __init__(self):
        self.main = Corrector()
        self.match = ""
        self.matchgeo = ""
        self.commands = []
        self.option = Options()
        self.display = Chunks("")
        self.display.create_Chunks()
        self.last_Display = ""
        self.find = Chunks("")
        self.find.create_Chunks()
        self.table = Outline()
        self.location_History = {}

    def run(self):
        hlp = Helper();br=True;results={};resultsort=[];correcting = False
        while br:  
            if correcting or self.matchgeo:
                pass
            else:
                command = raw_input("SMS in: ")
            if not self.matchgeo:
                t,option,word = self.process(unicode(command))
                words = t[0];disting = t[1][-1]
            if option and len(resultsort)==0:
                self.option = option;self.match = word
                if len(self.display) > 0 :
                    self.last_Display = self.display
                self.display = Chunks(self.option.print_Options())
                self.display.create_Chunks(heading="By ({0}) Did you mean: ".format(word),
                           footing="Please choose.")
                print self.display.goto_Chunk(); continue
            
            if len(self.option)>0 and len(words[0])==1:
                ch = self.option.select_Option(words[0])
                if ch!=None and len(resultsort)==0:
                    self.main.correctedHistory[self.match]=self.option[ch][2]
                    disting = t[1][-2]; 
                    k = disting['tokens'].index(self.match)
                    disting['tokens'][k] = self.option[ch][2]
                    try:
                        k = disting['words'].index(self.match)
                        disting['words'][k] = self.option[ch][2]
                        words = disting['words']
                    except:
                        disting['words'].append(self.option[ch][2])
                        words = disting['words']
                    if self.option[ch][2] == "find":
                        word = "find";self.option = Options()
                    else:
                        self.option = Options();correcting = True
                        command = " ".join(disting['tokens']); continue
                if ch!=None and type(resultsort)!=unicode:
                    text = "{0} - Ratings : {1}".format(resultsort[ch][0],resultsort[ch][1])
                    text += "\n" +" Telephone Number: " + results[resultsort[ch][0]][1]
                    text += "\n Address: "+results[resultsort[ch][0]][0]
                    self.display = Chunks(text)
                    self.display.create_Chunks()
                    print self.display.goto_Chunk()
                    self.option = Options();continue
                if ch!=None and type(resultsort)==unicode:
                    self.matchgeo = [results[ch]]; self.location_History[resultsort] = [results[ch]]
                    disting = t[1][-2]; words = disting['words'];self.option = Options()
                    continue

            correcting = False
                               
            if word == "find" or "find" in words:
                if word == "find":
                    self.find = self.last_Display
                else:
                    self.find = self.display
                expand = self.find.find_Chunks(" ".join([i for i in disting['tokens'] if i!="find"]))
                if expand!=False:
                    self.find.chunk_list = expand
                    print self.find.goto_Chunk()
                else:
                    print "No results found"
                continue
            
            for k in range(len(words)):
                dirfin = finder(r'directory|places',words[k])   
                if dirfin.found():
                    if "list" in words:
                        self.display = Chunks(",".join(hlp.dirtypes()).replace("_"," "))
                        self.display.create_Chunks(heading="List of types of places:")
                        print self.display.goto_Chunk(); break
                    direc=Directory([i for i in words if i!="directory" or i!="places"])
                    if len(self.matchgeo) > 0:
                        direc.locs=self.matchgeo; self.matchgeo = ""
                    results,resultsort = direc.run(disting,self.location_History)
                    if results == None:
                        break
                    elif type(resultsort) == unicode:
                        for i in results:
                            self.option.add_Option(content="{0}".format(i[0].encode('utf-8')))
                        self.display = Chunks(self.option.print_Options())
                        self.display.create_Chunks(heading="By {0} did you mean:".format(str(resultsort)),
                              footing="Please choose a location. ")
                        print self.display.goto_Chunk(); break                        
                            
                    for i in resultsort:
                        self.option.add_Option(content="{0} - Ratings : {1}".format(i[0].encode('utf-8'),str(i[1])))
                    self.display = Chunks(self.option.print_Options())
                    self.display.create_Chunks(heading="Nearby places:",
                              footing="Choose for more details. ")
                    print self.display.goto_Chunk(); break     
                        
                outfin = finder(r'outline',words[k])
                if outfin.found():
                    with open("textblock.txt","rb") as f:
                        textblock= f.read().decode("string_escape")
                    for i in range(len(textblock)):
                        if textblock[i:i+2] == "\u":   
                            textblock= textblock[:i]+unichr(int(textblock[i+2:i+6],base=16)).encode("utf-8")  +textblock[i+6:]
                    f.close()
                    self.table.add_TxtBlock(textblock)
                    if self.table.run([i for i in words if i !="outline"],disting['numbers']) != False:
                        self.display = self.table.run([i for i in words if i !="outline"],disting['numbers'])
                        print self.display.goto_Chunk(); break
                
                exifin = finder(r'exit',words[k])
                if exifin.found():
                    br=False;break   
                helpfin = finder(r'help',words[k])
                if helpfin.found():
                    print hlp;break
                
                if "next" in words[k]:
                    print self.display.next_Chunk(); break
                
                if "goto" in words[k]:
                    try:
                        n = disting['numbers'][0]
                        num = int(n)
                    except:
                        num = -1
                    print self.display.goto_Chunk(num); break

    def process(self,command):
        opts = [];self.main.disting(command)
        for i in self.main.cur()['words']+self.main.cur()['splits']:
            for j in self.main.match(i):
                if j[0]=='None':
                    continue
                if j[1]==0:
                    try:
                        k = self.main.cur()['words'].index(i)
                        self.main.cur()['words'][k] = j[0]
                    except:
                        self.main.cur()['words'].append(j[0])
                    continue
                        
                if opts == []:
                    opts = Options()
                opts.add_Option(content="{0}".format(j[0]))
            
            if opts:
                return [self.main.cur()['words'],self.main.history],opts,i
                
        return [self.main.cur()['words'],self.main.history],opts,""
示例#13
0
 def setUp(self):
     self.corrector = Corrector()
示例#14
0
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
import numpy as np
from nlp import Ui_Form
from utils.langconv import *
from set_custom import Ui_widget
from playsound import playsound
from test import speech_recognition
from corrector import Corrector
c = Corrector()
c.set_custom_confusion_dict('./my_custom.txt')


class MainWindow(QMainWindow, Ui_Form):
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent)
        self.setupUi(self)
        pix = QPixmap('index.jpg').scaled(self.label_3.width(),
                                          self.label_3.height())
        self.label_3.setPixmap(pix)
        self.pushButton_1.clicked.connect(self.error_detection)
        self.pushButton_2.clicked.connect(self.error_correction)
        self.pushButton_3.clicked.connect(self.set_custom)
        self.pushButton_4.clicked.connect(self.add_speech)
        self.pushButton_5.clicked.connect(self.simplified2traditional)
        self.pushButton_6.clicked.connect(self.play)

    def error_detection(self):
        error_sentences = self.textEdit.toPlainText()
        corrected_sent, err = Corrector.correct(c, error_sentences)
示例#15
0
from flask import Flask, render_template, request
from corrector import Corrector
import json
import logging
app = Flask(__name__)

corrector = Corrector()
if app.debug:
    corrector.logger.setLevel(logging.DEBUG)

@app.route("/")
def home():
    return render_template("home.html",
        guias = corrector.nombres_guias())

@app.route("/guia/<titulo>")
def mostrar_guia(titulo):
    assert titulo in corrector.nombres_guias()
    # Definimos los parámetros de datos para los ejercicios.
    ejercicios = corrector.ejercicios_de(titulo)
    params = []
    for ejercicio in ejercicios:
        nparams = len(ejercicio["archivos_entrada"].split(","))
        if nparams == 1:
            params.append("datos")
        else:
            # "datos1, datos2, ..."
            params.append(", ".join(["datos"+str(i+1) for i in range(nparams)]))

    return render_template("guia.html",
        guias = corrector.nombres_guias(),
示例#16
0
from corrector import Corrector

reader = 'Conll'

if reader == 'MovieDialog':
    train_path = 'dataset/movie_lines.txt'
    test_path = 'dataset/test.txt'
    model_path = 'dialog_correcter_model'
elif reader == 'Conll':
    train_path = 'dataset/CONLL/train.txt'
    test_path = 'dataset/CONLL/test.txt'
    model_path = 'conll_correcter_model'

example_path = 'dataset/example.txt'

corrector = Corrector(train_path, test_path, model_path, reader)
corrector.corrector_init()

def process(sent):
    result = corrector.correct(sent)
    result, alt, err = corrector.correct(sent)
    print('Output:', result)
    # print('-'*30)
    for r, e in zip(alt, err):
        x = r[0][0] if r[0] else None
        y = r[1][0] if r[1] else None
        if not x:
            print('{:>13} ERROR      Add: {} '.format(e, y))
        elif not y:
            print('{:>13} ERROR   Remove: {}  '.format(e, x))
        else:
示例#17
0
 def __init__(self, application, request, **kwargs):
     super(CorrectHandler, self).__init__(application, request, **kwargs)
     self.corrector = Corrector()
示例#18
0
class CorrectTest(unittest.TestCase):
    """Test cases for Corrector"""
    def setUp(self):
        self.corrector = Corrector()

    def test_equals(self):
        self.assertEqual(self.corrector.correction('somthing'),
                         'something')  # insert
        self.assertEqual(self.corrector.correction('speling'),
                         'seeing')  # insert
        self.assertEqual(self.corrector.correction('korrectud'),
                         'corrected')  # replace 2
        self.assertEqual(self.corrector.correction('coryright'),
                         'copyright')  # replace
        self.assertEqual(self.corrector.correction('aventure'),
                         'adventure')  # insert 2
        self.assertEqual(self.corrector.correction('additonel'),
                         'additional')  # delete
        self.assertEqual(self.corrector.correction('peotry'),
                         'poetry')  # transpose
        self.assertEqual(self.corrector.correction('peotryy'),
                         'poetry')  # transpose + delete
        self.assertEqual(self.corrector.correction('word'), 'word')  # known
        self.assertEqual(self.corrector.correction('quintessential'),
                         'quintessential')  # unknown
        self.assertEqual(self.corrector.correction('moring'),
                         'morning')  # insert