def test_model(self, test_data, empty_solution, evaluate = False): model_weka = None if os.path.isfile(self.prediction_file): print 'Model ' + self.name + ' already tested.' elif not os.path.isfile(self.model_file): print 'Impossible testing this model. It should be trained first.' return else: print 'Starting to test_model model ' + self.name + '.' model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) predictions = evaluation.predictions() rows = read_sheet(file_name = empty_solution) solutions = [] for row in rows: solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()] solutions.append(solution) write_the_solution_file(solutions, self.prediction_file) print 'Model ' + self.name + ' tested.' if evaluate == True: if os.path.isfile(self.evaluation_file): print 'Model ' + self.name + ' already evaluated.' return elif model_weka == None: model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) save_file(file_name = self.evaluation_file, content = evaluation.to_summary()) print 'Model ' + self.name + ' evaluated.'
def read_file(file_name): tile_set_list = [] characteristic = [] jvm.start() nmrClass = Classifier(jobject=serialization.read("models/lmt_3sd.model")) with open(file_name) as f: # opens file # reads in characteristic protein sequence and coverts it to expected chemical shift values tile_characteristic = f.readline() characteristic = re.findall(r'\b[A-Za-z]{3,4}\b', tile_characteristic) characteristic = letters_to_numbers(characteristic) for line in f: # reads in NMR Data #reads each line and grabs numbers and na data #file format "a b c d" a, b, c, d = re.findall(r'\b\d+\.\d*\b|\bna\b', line) # Dealing with missing data if (a == "na"): a = -1 if (b == "na"): b = -1 if (c == "na"): c = -1 if (d == "na"): d = -1 # adds a new Tile to tile_set_list if (not (a==-1 and b==-1 and c==-1 and d==-1)): tile_set_list.append(Tile(a, b, c, d, nmrClass)) return tile_set_list, characteristic, nmrClass
def assign_cluster(file_location, file_out="clustered.csv", model="kmeans.model", last_filename=False): data = read_csv_file(file_location) check_jvm() # load clusters obj = serialization.read(model) clusterer = Clusterer(jobject=obj) # create file with cluster group with open(file_out, 'w') as output: for index, attrs in enumerate(data): tmp = [] if last_filename: inst = Instance.create_instance(attrs[:-2]) else: inst = Instance.create_instance(attrs[1:]) pred = clusterer.cluster_instance(inst) dist = clusterer.distribution_for_instance(inst) if last_filename : tmp.append(attrs[-1]) tmp.append(pred) tmp.extend(attrs[:-2]) else: tmp.append(attrs[0]) tmp.append(pred) tmp.extend(attrs[1:]) print(str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) output.write('%s\n'%(','.join(map(str,tmp)) ))
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # train classifier classifier = Classifier("weka.classifiers.trees.J48") classifier.build_classifier(iris_data) # save and read object helper.print_title("I/O: single object") outfile = tempfile.gettempdir() + os.sep + "j48.model" serialization.write(outfile, classifier) model = Classifier(jobject=serialization.read(outfile)) print(model) # save classifier and dataset header (multiple objects) helper.print_title("I/O: single object") serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)]) objects = serialization.read_all(outfile) for i, obj in enumerate(objects): helper.print_info("Object #" + str(i+1) + ":") if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")): obj = Instances(jobject=obj) elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")): obj = Classifier(jobject=obj) print(obj)
def assign_classify(file_location, output="classified.out", model="naivebayes.model"): data = read_csv_file(file_location) jvm.start() # load clusters obj = serialization.read(model) classifier = Classifier(jobject=obj) # create file with cluster group with open(output, 'w') as cluster_file: for index, attrs in enumerate(data): inst = Instance.create_instance(attrs[1:]) pred = classifier.classify_instance(inst) print(str(index + 1) + ": label index=" + str(pred)) jvm.stop()
def query_instance(attributes, model="kmeans.model"): """ get the cluster for defined attributes :params attributes: array or list :returns: cluster id """ check_jvm() # create instance inst = Instance.create_instance(attributes) # load model obj = serialization.read(model) # load cluster and get the cluster_id cluster = Clusterer(jobject=obj) cluster_id = cluster.cluster_instance(inst) return cluster_id
def train_model(self, training_data): model_weka = None if os.path.isfile(self.model_file): print 'Model ' + self.name + ' already trained.' else: print 'Starting to train_model model ' + self.name + '.' model_weka = Classifier(classname = self.classname, options = self.options) model_weka.build_classifier(data = training_data) serialization.write(filename = self.model_file, jobject = model_weka) print 'Model ' + self.name + ' trained and saved.' if os.path.isfile(self.parameter_file): print 'Parameters of the model ' + self.name + ' already saved.' else: if model_weka == None: model_weka = Classifier(jobject = serialization.read(self.model_file)) save_file(file_name = self.parameter_file, content = str(model_weka)) print 'Parameters of the model ' + self.name + ' saved.'
def test_read_write(self): """ Tests methods read and write. """ fname = self.tempfile("readwrite.ser") self.delfile(fname) lin = ["A", "B", "C", "D"] vin = javabridge.make_instance("java/util/Vector", "()V") for element in lin: javabridge.call(vin, "add", "(Ljava/lang/Object;)Z", element) serialization.write(fname, vin) self.assertTrue(os.path.exists(fname), msg="Failed to write to " + fname + "?") vout = serialization.read(fname) self.assertIsNotNone(vout, msg="Failed to read from " + fname + "?") enm = javabridge.call(vin, "elements", "()Ljava/util/Enumeration;") lout = typeconv.enumeration_to_list(enm) self.delfile(fname) self.assertEqual(lin, lout, msg="Input/output differ")
def read_model_weka(path): return Classifier(jobject=serialization.read(path))
def load_prediction_model(self, path_prediction_model): return read(path_prediction_model)
def load_model(fname, dir_name): outfile = realpath(join(dir_name, fname)) model = Classifier(jobject=serialization.read(outfile)) return model
def classify(): loader = Loader(classname="weka.core.converters.ArffLoader") dataset20x20 = loader.load_file("preprocessed/notes/20x20.arff") dataset20x20.class_is_last() dataset20x50 = loader.load_file("preprocessed/notes/20x50.arff") dataset20x50.class_is_last() dataset50x20 = loader.load_file("preprocessed/notes/50x20.arff") dataset50x20.class_is_last() model20x20 = Classifier(jobject=serialization.read("nn20x20.model")) model35x20 = Classifier(jobject=serialization.read("nn20x50.model")) model50x20 = Classifier(jobject=serialization.read("nn50x20.model")) nn1 = model20x20 nn2 = model35x20 nn3 = model50x20 class1 = [] class2 = [] class3 = [] for index, inst in enumerate(dataset20x20): pred1 = nn1.classify_instance(inst) class1.append(pred1) for index, inst in enumerate(dataset20x50): pred2 = nn2.classify_instance(inst) class2.append(pred2) for index, inst in enumerate(dataset50x20): pred3 = nn3.classify_instance(inst) class3.append(pred3) for i in range(len(class1)): if os.path.isfile('preprocessed/notes/note' + '%s' % (i) + '.png'): img = cv2.imread('preprocessed/notes/note' + '%s' % (i) + '.png', 0) #cv2.imshow('Note' + '%s' % (i) , cv2.resize(img,(200,200))) if not os.path.isfile("classified/vote_log.csv"): header() values = [] print "NOTE", i, ":" print " nn1:", noteName(class1[i]) print " nn2:", noteName(class2[i]) print " nn3:", noteName(class3[i]) values.append("note" + str(i) + ".png") values.append(" ") values.append(noteName(class1[i])) values.append(noteName(class2[i])) values.append(noteName(class3[i])) cv2.imwrite("1_%s" % noteName(class1[i]) + "/note%s" % i + ".png", img) cv2.imwrite("2_%s" % noteName(class2[i]) + "/note%s" % i + ".png", img) cv2.imwrite("3_%s" % noteName(class2[i]) + "/note%s" % i + ".png", img) if class1[i] == class2[i]: print "MAJORITY VOTE:", noteName(class1[i]) cv2.imwrite( "classified/" + "%s" % noteName(class1[i]) + "/note%s" % i + ".png", img) values.append(noteName(class1[i])) elif class1[i] == class3[i]: print "MAJORITY VOTE:", noteName(class1[i]) cv2.imwrite( "classified/" + "%s" % noteName(class1[i]) + "/note%s" % i + ".png", img) values.append(noteName(class1[i])) elif class2[i] == class3[i]: print "MAJORITY VOTE:", noteName(class2[i]) cv2.imwrite( "classified/" + "%s" % noteName(class2[i]) + "/note%s" % i + ".png", img) values.append(noteName(class2[i])) else: print "No classification" cv2.imwrite("noClassification/note%s" % i + ".png", img) values.append("no classification") print "\n" writer = csv.writer(open("classified/vote_log.csv", 'ab')) writer.writerows([values]) cv2.waitKey(0)
def loadModel(modelPath): model = Classifier(jobject=sr.read(modelPath)) return model
def main(): global stop_spinning, name, upper_clothing, lower_clothing, outer_clothing, shoes_clothing, upper_indices, lower_indices, outer_indices, shoes_indices ''' Classifies clothing using stored classification models for each user ''' FSM = ClothingFSM() #FSM.username_server() clothingdb = MySQLdb.connect(host="localhost", user="******", passwd="mypassword", # Change to your SQL DB password db = "userprofiles") cursor = clothingdb.cursor() cursor.execute("SELECT * FROM clothing") name = "Study" #Populate clothing dictionaries with user's wardrobe for row in cursor.fetchall(): print str(row[2]) print str(row[6]) if str(row[0]) == name: if str(row[1]) == "Upper Body": try: upper_clothing[row[2]].append(row[6]) except: print "Problem appending clothing to dictionary" if str(row[1]) == "Lower Body": try: lower_clothing[row[3]].append(row[6]) except: print "Problem appending clothing to dictionary" if str(row[1]) == "Outerwear": try: outer_clothing[row[4]].append(row[6]) except: print "Problem appending clothing to dictionary" if str(row[1]) == "Shoes": try: shoes_clothing[row[5]].append(row[6]) except: print "Problem appending clothing to dictionary" print upper_clothing, lower_clothing, outer_clothing, shoes_clothing # FSM.received_user_info() #In final program, we will receive this information from database #Set to true or false if receiving features vs testing defaults receive_features = True if receive_features == False: #Wait to Receive input #Example inputs from user/weather API features['casual_formal'] = 3 #5 is very comfortable 1 is not comfortable features['comfort'] = 3 #1 is not snowing 2 is light snow 3 is heavy snow features['snow'] = 1 #1 is not raining 3 is raining(no medium) features['rain'] = 3 #If user is spending their time mostly outside, set warmth to outsidewarmth. If not, set warmth features['warmth'] = 1 features['outside_warmth'] = 4 #1 is no 0 is yes features['athletic'] = 1 snowstring = '' rainstring = '' athleticstring = '' else: FSM.features_server() upper_array = [None] * 14 lower_array = [None] * 7 outer_array = [None] * 3 shoes_array = [None] * 4 upper_prediction_array = [] lower_prediction_array = [] outer_prediction_array = [] shoes_prediction_array = [] warmth_att = Attribute.create_numeric("Warmth") comfort_att = Attribute.create_numeric("Comfort") casual_att = Attribute.create_numeric("Casual") rain_att = Attribute.create_numeric("Rain") snow_att = Attribute.create_numeric("Snow") athletic_att = Attribute.create_numeric("Athletic") upper_attributes = [warmth_att, casual_att, comfort_att, athletic_att] lower_attributes = [warmth_att, casual_att, comfort_att, athletic_att] outer_attributes = [warmth_att, casual_att, comfort_att, snow_att, rain_att] shoes_attributes = [casual_att, comfort_att, athletic_att] Instances.create_instances("upper_instances", upper_attributes, 0) Instances.create_instances("lower_instances", lower_attributes, 0) Instances.create_instances("outer_instances", outer_attributes, 0) Instances.create_instances("shoes_instances", shoes_attributes, 0) #Simulate their wardrobe #Upper # Tank Top if len(upper_clothing['Tank Top']) == 0: upper_array[0] = 0 else: upper_array[0] = 1 # T-Shirt if len(upper_clothing['T-Shirt']) == 0: upper_array[1] = 0 else: upper_array[1] = 1 # Long-Sleeved Shirt if len(upper_clothing['Long-sleeved Shirt']) == 0: upper_array[2] = 0 else: upper_array[2] = 1 # Athletic Top if len(upper_clothing['Athletic Top']) == 0: upper_array[3] = 0 else: upper_array[3] = 1 # Button-down Shirt if len(upper_clothing['Button-down Shirt']) == 0: upper_array[4] = 0 else: upper_array[4] = 1 # Polo Shirt if len(upper_clothing['Polo Shirt']) == 0: upper_array[5] = 0 else: upper_array[5] = 1 # Dress Shirt if len(upper_clothing['Dress Shirt']) == 0: upper_array[6] = 0 else: upper_array[6] = 1 # Suit Jacket if len(upper_clothing['Suit Jacket']) == 0: upper_array[7] = 0 else: upper_array[7] = 1 # Blazer if len(upper_clothing['Blazer']) == 0: upper_array[8] = 0 else: upper_array[8] = 1 # Hoodie if len(upper_clothing['Hoodie']) == 0: upper_array[9] = 0 else: upper_array[9] = 1 # Sweater if len(upper_clothing['Sweater']) == 0: upper_array[10] = 0 else: upper_array[10] = 1 # Blouse if len(upper_clothing['Blouse']) == 0: upper_array[11] = 0 else: upper_array[11] = 1 # Day Dress if len(upper_clothing['Day Dress']) == 0: upper_array[12] = 0 else: upper_array[12] = 1 # Evening Dress if len(upper_clothing['Evening Dress']) == 0: upper_array[13] = 0 else: upper_array[13] = 1 #Lower # Regular Shorts if len(lower_clothing['Shorts']) == 0: lower_array[0] = 0 else: lower_array[0] = 1 # Athletic Shorts if len(lower_clothing['Athletic Shorts']) == 0: lower_array[1] = 0 else: lower_array[1] = 1 # Athletic Pants if len(lower_clothing['Athletic Pants']) == 0: lower_array[2] = 0 else: lower_array[2] = 1 # Jeans if len(lower_clothing['Jeans']) == 0: lower_array[3] = 0 else: lower_array[3] = 1 # Trousers if len(lower_clothing['Trousers']) == 0: lower_array[4] = 0 else: lower_array[4] = 1 # Skirt if len(lower_clothing['Skirt']) == 0: lower_array[5] = 0 else: lower_array[5] = 1 # Dress Pants if len(lower_clothing['Dress Pants']) == 0: lower_array[6] = 0 else: lower_array[6] = 1 #Outer # Light Jacket if len(outer_clothing['Light Jacket']) == 0: outer_array[0] = 0 else: outer_array[0] = 1 # Heavy Jacket if len(outer_clothing['Winter Jacket']) == 0: outer_array[1] = 0 else: outer_array[1] = 1 # Rain Jacket if len(outer_clothing['Rain Jacket']) == 0: outer_array[2] = 0 else: outer_array[2] = 1 #Shoes # Casual Shoes if len(shoes_clothing['Casual Shoes']) == 0: shoes_array[0] = 0 else: shoes_array[0] = 1 # Athletic Shoes if len(shoes_clothing['Athletic Shoes']) == 0: shoes_array[1] = 0 else: shoes_array[1] = 1 # Dress Shoes if len(shoes_clothing['Dress Shoes']) == 0: shoes_array[2] = 0 else: shoes_array[2] = 1 # Dressy Casual Shoes if len(shoes_clothing['Business Casual Shoes']) == 0: shoes_array[3] = 0 else: shoes_array[3] = 1 upper_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['athletic']] lower_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])] outer_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['rain'], features['snow']] shoes_list = [features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])] upper_instance = Instance.create_instance(upper_list, classname='weka.core.DenseInstance', weight= 1.0) lower_instance = Instance.create_instance(lower_list, classname='weka.core.DenseInstance', weight= 1.0) outer_instance = Instance.create_instance(outer_list, classname='weka.core.DenseInstance', weight= 1.0) shoes_instance = Instance.create_instance(shoes_list, classname='weka.core.DenseInstance', weight= 1.0) upper_path = '/home/leo/models/uppermodel2.model' lower_path = '/home/leo/models/lowermodel2.model' outer_path = '/home/leo/models/outermodel2.model' shoes_path = '/home/leo/models/shoesmodel7.model' upper_classifier = Classifier(jobject=serialization.read(upper_path)) lower_classifier = Classifier(jobject=serialization.read(lower_path)) outer_classifier = Classifier(jobject=serialization.read(outer_path)) shoes_classifier = Classifier(jobject=serialization.read(shoes_path)) upper_predictions = upper_classifier.distribution_for_instance(upper_instance) lower_predictions = lower_classifier.distribution_for_instance(lower_instance) outer_predictions = outer_classifier.distribution_for_instance(outer_instance) shoes_predictions = shoes_classifier.distribution_for_instance(shoes_instance) if features['rain'] == 1: rainstring = 'No' if features['rain'] == 3: rainstring = 'Yes' if features['snow'] == 1: snowstring = 'No' if features['snow'] == 3: snowstring = 'Yes' if features['athletic'] == 1: athleticstring = 'No' if features['athletic'] == 0: athleticstring = 'Yes' print "Features being Classified:" print "Outside Warmth:", features['outside_warmth'], "Inside-Outside:", features['inside_outside'], "Casual-Formal:", features['casual_formal'], "Comfort:", features['comfort'], "Athletic:", athleticstring, "Rain:", rainstring, "Snow:", snowstring #Remove Clothing Options User Doesn't Own for i in range(len(upper_array)): if upper_array[i] == 0: upper_prediction_array.append(0) else: upper_prediction_array.append(upper_predictions[i]) for i in range(len(lower_array)): if lower_array[i] == 0: lower_prediction_array.append(0) else: lower_prediction_array.append(lower_predictions[i]) for i in range(len(outer_array)): if outer_array[i] == 0: outer_prediction_array.append(0) else: outer_prediction_array.append(outer_predictions[i]) for i in range(len(shoes_array)): if shoes_array[i] == 0: shoes_prediction_array.append(0) else: shoes_prediction_array.append(shoes_predictions[i]) #Find the top 3 options for each classifier max_index_upper1 = 0 max_index_upper2 = 0 max_index_upper3 = 0 max_index_upper4 = 0 max_index_upper5 = 0 for i in range(1,len(upper_prediction_array)): n = upper_prediction_array[max_index_upper1] if upper_prediction_array[i] > n: max_index_upper1 = i upper_prediction_array[max_index_upper1] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper2] if upper_prediction_array[i] > n: max_index_upper2 = i upper_prediction_array[max_index_upper2] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper3] if upper_prediction_array[i] > n: max_index_upper3 = i upper_prediction_array[max_index_upper3] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper4] if upper_prediction_array[i] > n: max_index_upper4 = i upper_prediction_array[max_index_upper4] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper5] if upper_prediction_array[i] > n: max_index_upper5 = i upper_indices = [max_index_upper1, max_index_upper2, max_index_upper3, max_index_upper4, max_index_upper5] max_index_lower1 = 0 max_index_lower2 = 0 max_index_lower3 = 0 max_index_lower4 = 0 max_index_lower5 = 0 for i in range(1,len(lower_prediction_array)): n = lower_prediction_array[max_index_lower1] if lower_prediction_array[i] > n: max_index_lower1 = i lower_prediction_array[max_index_lower1] = 0 for i in range(1,len(lower_prediction_array)): n = lower_prediction_array[max_index_lower2] if lower_prediction_array[i] > n: max_index_lower2 = i lower_prediction_array[max_index_lower2] = 0 for i in range(1,len(lower_prediction_array)): n = lower_prediction_array[max_index_lower3] if lower_prediction_array[i] > n: max_index_lower3 = i lower_prediction_array[max_index_lower3] = 0 for i in range(1, len(lower_prediction_array)): n = lower_prediction_array[max_index_lower4] if lower_prediction_array[i] > n: max_index_upper4 = i lower_prediction_array[max_index_lower4] = 0 for i in range(1, len(lower_prediction_array)): n = lower_prediction_array[max_index_lower5] if lower_prediction_array[i] > n: max_index_lower5 = i lower_indices = [max_index_lower1, max_index_lower2, max_index_lower3, max_index_lower4, max_index_lower5] max_index_outer1 = 0 max_index_outer2 = 0 max_index_outer3 = 0 for i in range(1, len(outer_prediction_array)): n = outer_prediction_array[max_index_outer1] if outer_prediction_array[i] > n: max_index_outer1 = i outer_prediction_array[max_index_outer1] = 0 for i in range(1, len(outer_prediction_array)): n = outer_prediction_array[max_index_outer2] if outer_prediction_array[i] > n: max_index_outer2 = i outer_prediction_array[max_index_outer2] = 0 for i in range(1, len(outer_prediction_array)): n = outer_prediction_array[max_index_outer3] if outer_prediction_array[i] > n: max_index_outer3 = i outer_indices = [max_index_outer1, max_index_outer2, max_index_outer3] max_index_shoes1 = 0 max_index_shoes2 = 0 max_index_shoes3 = 0 max_index_shoes4 = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes1] if shoes_prediction_array[i] > n: max_index_shoes1 = i shoes_prediction_array[max_index_shoes1] = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes2] if shoes_prediction_array[i] > n: max_index_shoes2 = i shoes_prediction_array[max_index_shoes2] = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes3] if shoes_prediction_array[i] > n: max_index_shoes3 = i shoes_prediction_array[max_index_shoes3] = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes4] if shoes_prediction_array[i] > n: max_index_shoes4 = i shoes_indices = [max_index_shoes1, max_index_shoes2, max_index_shoes3, max_index_shoes4] print "Outer Indices:", outer_indices FSM.received_inputs() print "Exiting Program"
def test(objs, paras, testfile1, pred, real): testfile = preprocess(testfile1, True) xref = {'x_nT':1,'x_nT_delta':0,'x_nK':1,'x_nK_delta':0,'x_long':1,'x_str':0,'x_strsum':0} add_features(xref, 'x') zeroref = [] for k in ['long', 'nK', 'nK_delta', 'nT', 'nT_delta', 'str', 'strsum']: zeroref.append(xref['x_%s' % k]) zeroref.append(0) # should be obj for k in addf(): zeroref.append(xref['x_%s' % k]) with open(testfile) as fin: reader = csv.DictReader(fin) linecount = 0 for line in reader: ops = [] for h in line: if h.startswith('op'): ops.append(h[:h.find('_')]) for op in ops: add_features(line, op) stats = {} valid = True real_line = {} for h in line: if h.startswith('op'): k = h[:h.find('_')] v = h[h.find('_')+1:] if k not in stats: stats[k] = {} stats[k][v] = pfloat(line[h]) if stats[k][v] is None: valid = False elif h in objs: real_line[h] = pfloat(line[h]) if real_line[h] is None: valid = False if not valid: continue linecount += 1 if linecount > 250: continue #for k in stats: # assert len(paras) == len(stats[k]) # for v in stats[k]: # assert v in paras for obj in objs: c = Classifier(jobject=serialization.read(model_file('hash', obj))) zerovalue = c.classify_instance(Instance.create_instance(zeroref)) #s = 0 s = zerovalue for op in stats: values = [] for k in ['long', 'nK', 'nK_delta', 'nT', 'nT_delta', 'str', 'strsum']: values.append(stats[op][k]) values.append(0) # should be obj for k in addf(): values.append(stats[op][k]) ins = Instance.create_instance(values) prediction = c.classify_instance(ins) #print ' ', obj, op, values, prediction, prediction - zerovalue #s += pred s = s + max(prediction - zerovalue, 0) #print obj, 'real', real_line[obj], 'pred', s pred[obj].append(s) real[obj].append(real_line[obj]) print 'test', testfile, 'linecount', linecount subprocess.call('rm %s' % testfile, shell=True)
def loadClusterModel(self, method, mname): finalname = "%s_%s.model" % (method, mname) cluster = Clusterer(jobject=serialization.read(os.path.join(self.modelDir, finalname))) logger.info('[%s] : [INFO] Loaded clusterer mode %s ', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), finalname) return cluster
def loadFilter(filterPath): filter = Filter(jobject=sr.read(filterPath)) return filter
def train(request): jvm.start() d_att1 = Attribute.create_numeric("bodydearword.feature") d_att2 = Attribute.create_numeric("bodyform.feature") d_att3 = Attribute.create_numeric("bodyhtml.feature") d_att4 = Attribute.create_numeric("bodymultipart.feature") d_att5 = Attribute.create_numeric("bodynumchars.feature") d_att6 = Attribute.create_numeric("bodynumfunctionwords.feature") d_att7 = Attribute.create_numeric("bodynumuniqwords.feature") d_att8 = Attribute.create_numeric("bodynumwords.feature") d_att9 = Attribute.create_numeric("bodyrichness.feature") d_att10 = Attribute.create_numeric("bodysuspensionword.feature") d_att11 = Attribute.create_numeric("bodyverifyyouraccountphrase.feature") d_att12 = Attribute.create_numeric("externalsabinary.feature") d_att13 = Attribute.create_numeric("externalsascore.feature") d_att14 = Attribute.create_numeric("scriptjavascript.feature") d_att15 = Attribute.create_numeric("scriptonclick.feature") d_att16 = Attribute.create_numeric("scriptpopup.feature") d_att17 = Attribute.create_numeric("scriptstatuschange.feature") d_att18 = Attribute.create_numeric("scriptunmodalload.feature") d_att19 = Attribute.create_numeric("senddiffreplyto.feature") d_att20 = Attribute.create_numeric("sendnumwords.feature") d_att21 = Attribute.create_numeric("sendunmodaldomain.feature") d_att22 = Attribute.create_numeric("subjectbankword.feature") d_att23 = Attribute.create_numeric("subjectdebitword.feature") d_att24 = Attribute.create_numeric("subjectfwdword.feature") d_att25 = Attribute.create_numeric("subjectnumchars.feature") d_att26 = Attribute.create_numeric("subjectnumwords.feature") d_att27 = Attribute.create_numeric("subjectreplyword.feature") d_att28 = Attribute.create_numeric("subjectrichness.feature") d_att29 = Attribute.create_numeric("subjectverifyword.feature") d_att30 = Attribute.create_numeric("urlatchar.feature") d_att31 = Attribute.create_numeric("urlbaglink.feature") d_att32 = Attribute.create_numeric("urlip.feature") d_att33 = Attribute.create_numeric("urlnumdomains.feature") d_att34 = Attribute.create_numeric("urlnumexternallink.feature") d_att35 = Attribute.create_numeric("urlnumimagelink.feature") d_att36 = Attribute.create_numeric("urlnuminternallink.feature") d_att37 = Attribute.create_numeric("urlnumip.feature") d_att38 = Attribute.create_numeric("urlnumlink.feature") d_att39 = Attribute.create_numeric("urlnumperiods.feature") d_att40 = Attribute.create_numeric("urlnumport.feature") d_att41 = Attribute.create_numeric("urlport.feature") d_att42 = Attribute.create_numeric("urltwodoains.feature") d_att43 = Attribute.create_numeric("urlunmodalbaglink.feature") d_att44 = Attribute.create_numeric("urlwordclicklink.feature") d_att45 = Attribute.create_numeric("urlwordherelink.feature") d_att46 = Attribute.create_numeric("urlwordloginlink.feature") d_att47 = Attribute.create_numeric("urlwordupdatelink.feature") d_att48 = Attribute.create_nominal("class", {'phish', 'ham'}) # data_dir = settings.BASE_DIR + "/phishing/public/datasets/" # loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(data_dir + "dataset.arff") data.class_is_last() cls = Classifier(classname="weka.classifiers.trees.J48") cls.options = ["-C", "0.3"] cls.build_classifier(data) serialization.write(data_dir + "out.model", cls) classifier = Classifier(jobject=serialization.read(data_dir + "out.model")) dataset = Instances.create_instances("test", [ d_att1, d_att2, d_att3, d_att4, d_att5, d_att6, d_att7, d_att8, d_att9, d_att10, d_att11, d_att12, d_att13, d_att14, d_att15, d_att16, d_att17, d_att18, d_att19, d_att20, d_att21, d_att22, d_att23, d_att24, d_att25, d_att26, d_att27, d_att28, d_att29, d_att30, d_att31, d_att32, d_att33, d_att34, d_att35, d_att36, d_att37, d_att38, d_att39, d_att40, d_att41, d_att42, d_att43, d_att44, d_att45, d_att46, d_att47, d_att48 ], 0) values = [ 0, 0, 0, 0, 890, 1, 124, 198, 0.22247191011236, 0, 0, 0, 0.0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 21, 4, 1, 0.19047619047619, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Instance.missing_value() ] inst = Instance.create_instance(values) dataset.add_instance(inst) dataset.class_is_last() # print(str(dataset)) var = '' for inst1 in dataset: pred = classifier.classify_instance(inst1) var = inst1.class_attribute.value(int(pred)) if var == 'ham': print('No es pishing') # do somthing else: print('Es pishing') # do somthing print(var) jvm.stop() return HttpResponse(str(var))
def __init__(self, model_path, senti_path, stop_words, ngrams_path): self.loader = Loader(classname="weka.core.converters.ArffLoader") self.features_calculator = FeaturesCalculator(ngrams_path) self.classifier = Classifier(jobject=serialization.read(model_path)) self.normalizer = Preprocessor(senti_path) self.stop_words = stop_words
print(t) jvm.start(max_heap_size="4g",packages=True) Wtrain = converters.load_any_file("train.csv") Wtest = converters.load_any_file("test.csv") Wtrain.class_is_last() Wtest.class_is_last() if(Path('lmt.model').exists()): lmt = Classifier(jobject=serialization.read("lmt.model")) else: lmt = Classifier(classname="weka.classifiers.trees.LMT") lmt.build_classifier(Wtrain) serialization.write("lmt.model", lmt) evlmt = Evaluation(Wtrain) evlmt.crossvalidate_model(lmt, Wtrain, 5, Random(1)) print("Error is",evlmt.error_rate) cm2e = evlmt.confusion_matrix cm2E = pd.DataFrame(cm2e, index = ["neg","pos"],columns = ["neg","pos"]) plt.figure(figsize = (7,7)) axis = sns.heatmap(cm2E, annot=True, cbar=False, cmap="Reds")