Exemplo n.º 1
0
    def Advanced(self,string,html,link):
        start_time = time.time()
        global i
        global j
        global data2
	global class2
	        
        if(i==0):
            stemmed_text,link_stemmed_text,c = ContentAnalyzer_Utils(string,html,link).read_text()
	    path = directory.path()
            doc = open(path+'/text/'+'news.txt', 'r').read()
            self.news_list = Stemming().stem(doc)
	    doc = open(path+'/text/'+'games.txt', 'r').read()
  	    self.games_list = Stemming().stem(doc)
	    doc = open(path+'/text/'+'medicine.txt', 'r').read()
	    self.med_list = Stemming().stem(doc)  
            pickle.dump(stemmed_text, open('pfile_text.p','wb'))
            pickle.dump(link_stemmed_text,open('pfile_link.p','wb'))
	    region_text,region_link,length_list_link_text,length_link_text = Bayesian(i,c).learning()
            p = Content_Classifier_2(region_text,region_link,length_list_link_text,length_link_text)
            k = 0
            sitecounter = 0
            for u in range(1,7):
                data2[k] = p.classify_text(u)
	        k = k + 1
                data2[k] = p.classify_link(u)
                k = k + 1
            i = 1
        elif(i==1):
            stemmed_text,link_stemmed_text,c = ContentAnalyzer_Utils(string,html,link).read_text()
            pickle.dump(stemmed_text, open('pfile_text.p','wb'))
            pickle.dump(link_stemmed_text,open('pfile_link.p','wb'))
	    region_text,region_link,length_list_link_text,length_link_text = Bayesian(i,c).learning()     
            p = Content_Classifier_2(region_text,region_link,length_list_link_text,length_link_text )
	
            k = 0
            sitecounter = 0
            for u in range(1,7):
                data2[k] = p.classify_text(u)
                k = k + 1
                data2[k] = p.classify_link(u)
                k = k + 1
            
        
        s = []
       
        s[:] = data2
	value = self.check_advanced(data2,stemmed_text)
	if(sum(data2) !=12):	
		value = value + ' ' +self.reason(data2)
	elif(sum(data2) == 12):
		value = 'SAFE'
        if sum(data2)==0 and len(stemmed_text) <=20: value = "Not-Enough-Text-on-Site"
	elif len(stemmed_text)==0 : value = "ERROR"

	return s, value		
Exemplo n.º 2
0
 def openandstem(self,file1):
     path = directory.path()
     doc = open(path+'/text/'+file1, 'r').read()
     return Stemming().stem(doc)