def train_liblinear(args): model_name, gold_dir, dirs = args[0], args[1], args[2:] vectors, predicates = get_data(gold_dir, dirs) prob = problem(map(num_to_class, predicates), vectors) param = parameter('-s 0') model = train(prob, param) save_model(model_name, model)
def save(self): sys.stderr.write('saving model...') save_model(self.modelName + '.model', self.model) sys.stderr.write('done\nsaving label and feature lists...') self.labelCounter.saveToFile(self.modelName + '.labelNumbers') self.featCounter.saveToFile(self.modelName + '.featureNumbers') sys.stderr.write('done\n')
def save(self): sys.stderr.write('saving model...') save_model(self.modelName+'.model', self.model) sys.stderr.write('done\nsaving label and feature lists...') self.labelCounter.saveToFile(self.modelName+'.labelNumbers') self.featCounter.saveToFile(self.modelName+'.featureNumbers') sys.stderr.write('done\n')
def save(self, model_path): """Saves the model as a directory at the given path.""" if os.path.exists(model_path): subprocess.check_output(["rm", "-rf", model_path]) os.makedirs(model_path) pickle.dump(self.__feature_extractor, open(os.path.join(model_path, "feature_extractor"), "wb"), protocol=pickle.HIGHEST_PROTOCOL) liblinearutil.save_model(os.path.join(model_path, "liblinear_model"), self.__liblinear_model)
def train_and_save(name, method): '''Runs training. Must use the liblinear library. ''' print 'Training dataset {} with method {}.'.format(name, method) model = get_training_method(method)(name) try: llb.save_model('models/{}.model'.format(name), model) print 'Saved model.' except: print 'Could not save model.'
def train( C, Y_train, X_train, x_lines ): """ This function takes in the training labels and features and creates a model and saves that model :param C : list containing parameter C :param X_train : training features :param Y_train : training labels :return None """ # for c in C: param = '-s 2 -c ' + str(C) model = lu.train(Y_train, X_train, param) lu.save_model("model/lmods2_tamper" + str(round(C,2)) + "_" + str(x_lines) + "l.model", model)
def train(c, Y_train, X_train): """ This function takes in the training labels and features and creates a model and saves that model :param C : list containing parameter C :param X_train : training features :param Y_train : training labels :return None """ #for c in C: param = '-s 2 -c ' + str(c) model = lu.train(Y_train, X_train, param) lu.save_model("model/lmods2_"+str(round(c,2))+".model", model)
def SaveModel(self): if(self.classifierType == "SVM" and self.packageType == "liblinear"): from liblinearutil import save_model save_model(self.featuresSerializationFileName, self.classifierModel) elif(self.classifierType == "DecisionTree" and self.packageType == "nltk"): # Open the serialization file serializationFile = open(self.featuresSerializationFileName, 'wb') # Save the model pickle.dump(self.classifierModel, serializationFile) # Close the serialization file serializationFile.close() else: print("Unsupported classifier and package type for SaveModel") '''
def SaveModel(self): if (self.classifierType == "SVM" and self.packageType == "liblinear"): from liblinearutil import save_model save_model(self.featuresSerializationFileName, self.classifierModel) elif (self.classifierType == "DecisionTree" and self.packageType == "nltk"): # Open the serialization file serializationFile = open(self.featuresSerializationFileName, 'wb') # Save the model pickle.dump(self.classifierModel, serializationFile) # Close the serialization file serializationFile.close() else: print("Unsupported classifier and package type for SaveModel") '''
def _save_models(self, model_dir, label_samples, train_samples, train_labels, train_feature_samples): out_json = {} out_json['tuples'] = self.model_keys out_json['train_samples_file'] = 'train_samples.json' out_json['feature_lexicon_file'] = 'feature_lexicon.json' out_json['ontology_file'] = 'ontology.json' output = codecs.open(os.path.join(model_dir, 'config.json'), 'w', 'utf-8') json.dump(out_json, output, indent=4) output.close() # save ontology file shutil.copyfile(self.ontology_file, os.path.join(model_dir,out_json['ontology_file'])) # save train samples output = codecs.open(os.path.join(model_dir, out_json['train_samples_file']), 'w', 'utf-8') train_json = {} train_json['train_samples'] = train_samples train_json['label_samples'] = label_samples train_json['train_feature_samples'] = train_feature_samples train_json['train_labels'] = train_labels json.dump(train_json, output, indent=4) output.close() # save train sample nums output = codecs.open(os.path.join(model_dir, 'train_samples_number.json'), 'w', 'utf-8') train_number_json={} for key, labels in train_labels.items(): pos_num = 0 neg_num = 0 for label in labels: if label == 0: neg_num += 1 elif label == 1: pos_num += 1 train_number_json[key] = {0:neg_num, 1:pos_num} json.dump(train_number_json, output, indent=4) output.close() # save feature self.feature.save_Lexicon(os.path.join(model_dir, out_json['feature_lexicon_file'])) # save svm models for model_key in self.model_keys: save_model(os.path.join(model_dir, '%s.svm.m' %(model_key)), self.models[model_key])
def save(self, model_path): """ Saves the model as a directory at the given path @type model_path: str @param model_path: path to save the trained model """ ## if-else statement added on 06.02.2017 if (self.__feature_extractor.feature_template == "relational") and (self.__feature_extractor.parser_type == "spacy"): print("Relational model with spaCy parser cannot be saved") else: pickle.dump(self.__feature_extractor, open( os.path.join(self.model_path, "feature_extractor"), "wb"), protocol=pickle.HIGHEST_PROTOCOL) # save trained model in the model_path directory liblinearutil.save_model( os.path.join(self.model_path, "liblinear_model"), self.__liblinear_model)
def save(self, modelname, featuresname): svm.save_model(modelname, self._svm_model) self._features.save(open(featuresname, 'wb'))
def train(instance_file, model_file, param): y, x = ll.svm_read_problem(instance_file) prob = ll.problem(y, x) m = ll.train(prob, param) ll.save_model(model_file, m) print 'done training', model_file
def TrainFromAttrDictFiles(self, attr_dict_file, feature_list, percent, tokenizer_mode, model_dir): # deal with model dir if os.path.exists(model_dir): shutil.rmtree(model_dir, True) os.mkdir(model_dir) input = codecs.open(attr_dict_file, "r", "utf-8") attr_data = json.load(input) input.close() # stat train samples print "stat train samples" self.feature = attr_feature(feature_list, percent, tokenizer_mode) self.feature.stat_Lexicon(attr_data) self.attrs = attr_data["attr_data_index"].keys() self.train_samples = [] for utter in attr_data["sub_utter_data"]: feature_vector = self.feature.ExtractFeatureFromSent(utter) self.train_samples.append(feature_vector) self.attrs_labels = {} for attr in self.attrs: self.attrs_labels[attr] = [0] * len(self.train_samples) for index in attr_data["attr_data_index"][attr]: self.attrs_labels[attr][index] = 1 # train svm model print "train svm models" for attr in self.attrs: print "Train attr: %s" % (attr) prob = problem(self.attrs_labels[attr], self.train_samples) param = parameter("-s 0 -c 1") self.models[attr] = liblinear.train(prob, param) # save model print "save models" out_json = {} out_json["attrs"] = self.attrs out_json["train_samples_file"] = "train_samples.json" out_json["feature_lexicon_file"] = "feature_lexicon.json" out_json["ontology_file"] = "ontology.json" output = codecs.open(os.path.join(model_dir, "config.json"), "w", "utf-8") json.dump(out_json, output, indent=4) output.close() # save train samples output = codecs.open(os.path.join(model_dir, out_json["train_samples_file"]), "w", "utf-8") train_sample_json = {} train_sample_json["samples"] = self.train_samples train_sample_json["label_index"] = self.attrs_labels json.dump(train_sample_json, output, indent=4) output.close() # save feature self.feature.save_Lexicon(os.path.join(model_dir, out_json["feature_lexicon_file"])) # save svm models for attr in self.attrs: save_model(os.path.join(model_dir, "%s.svm.m" % (attr)), self.models[attr]) print "Done!"
def save(self, path): liblinear.save_model(path + '-model', self._model) del (self._model) f = open(path, 'wb') pickle.dump((self._labels, self._features), f) f.close()
def train_model(): """训练模型 """ y, x = svm_read_problem(TRAIN_INPUT_FILE) m = train(y, x, "-c 4") save_model(SVM_MODEL_FILE, m)
def save(self, path): liblinear.save_model(path + "-model", self._model) del (self._model) ml.Classifier.save(self, path)
def save(self, path): liblinear.save_model(path + '-model', self._model) del(self._model) f = open(path, 'wb') pickle.dump((self._labels, self._features), f) f.close()
def train(self): if os.path.isfile("svm.model") and self.useModel: self.model = llu.load_model("svm.model") else: self.model = llu.train(self.ys, self.xs, self.train_param) llu.save_model("svm.model", self.model)
def train(word_dict): get_feature(word_dict, "data/train.dat", "data/train.format") get_feature(word_dict, "data/test.dat", "data/test.format") train_y, train_x = linear.svm_read_problem("data/train.format") model = linear.train(train_y, train_x) linear.save_model("model.dat", model)
def insert_predictions(data, features, active_features, baseline_file, date_0, date_A): folds = get_folds(data) test_data = folds[0] train_data = {} train_data.update(folds[1]) train_data.update(folds[2]) train_data.update(folds[3]) train_data.update(folds[4]) print "Normalize" normalized_train_features, mean, std = normalize_train(train_data, features) normalized_test_features = normalize_test(test_data, features, mean, std) print "Tune" best_c = get_best_c(train_data, normalized_train_features) print "Train" model = train(train_data, normalized_train_features, best_c) print "Predict" predictions = predict(test_data, normalized_test_features, model) print "Insert" conn = pymongo.MongoClient('localhost') db = conn['nicta'] pred_coll = db['predictions_' + date_A] vid_coll = db['videos'] w_coll = db['weights_' + date_A] feat_coll = db['features_' + date_0 + '_10'] sample_tweets = {} print 'Getting features' feat_idx = 0 for result in feat_coll.find(): feat_idx += 1 if feat_idx % 10000 == 0: print feat_idx, len(sample_tweets) vid_id = result['_id'] if vid_id not in predictions: continue tweets = result['value']['sample_tweets'] authors = result['value']['sample_authors'] all_tweets = result['value']['all_tweets'] text_concat = "" for twt in db['tweet'].find({'_id': {'$in' : all_tweets}}, ['text']): text_concat += twt['text'] compressed = zlib.compress(text_concat.encode('utf8')) sample_tweets[vid_id] = { 'tweets' : tweets, 'authors' : authors, 'average' : result['value']['tweet_count'] / float(10), 'compressed' : len(compressed) } print 'Videos with sample tweets:', len(sample_tweets) for line in open(baseline_file): tokens = line.strip().split(',') vid_id = tokens[0] if vid_id not in test_data: continue A_score = int(tokens[1]) test_data[vid_id]['train_views'] = A_score print 'Inserting' pred_coll.drop() for vid_id in predictions: if vid_id not in sample_tweets: continue pred_coll.insert({ 'id' : vid_id, 'score' : predictions[vid_id]['score'], 'actual' : test_data[vid_id]['class'], 'upload_date' : test_data[vid_id]['upload_date'], 'B_views' : test_data[vid_id]['views'], 'A_views' : test_data[vid_id]['train_views'], 'features' : features[vid_id], 'diversity' : sample_tweets[vid_id]['compressed'], 'active_features' : active_features[vid_id], 'sample_tweets' : sample_tweets[vid_id]['tweets'], 'sample_authors' : sample_tweets[vid_id]['authors'], 'normalized_features' : normalized_test_features[vid_id], 'average_tweets' : sample_tweets[vid_id]['average'] }) pred_coll.ensure_index("A_views") liblinearutil.save_model('MODEL.DAT', model) weight_line = False weights = [] for line in open('MODEL.DAT'): if not weight_line and line.strip() != 'w': continue elif line.strip() == 'w': weight_line = True continue else: weights.append(float(line.strip())) w_coll.remove() w_coll.insert({'weights' : weights}) conn.close() print "Done"
def predict(input_file, input_type, labels_files, output_dir, fill_diagonal=None, rows_file=None, test=False, save_model=False): if input_type == "edgelist": data, IDs = load_edgelist(input_file, fill_diagonal) elif input_type == "adjacency": data, IDs = load_adjacency(input_file, rows_file, fill_diagonal) elif input_type == "numpy": data, IDs = load_numpy(input_file, rows_file, fill_diagonal) else: data, IDs = load_feature_row_file(input_file, rows_file) labeled_IDs = [] labels = [] weights = [] for labels_file in labels_files: file_labeled_IDs, file_labels, file_weights = \ load_weighted_labels(labels_file) labeled_IDs += file_labeled_IDs labels += list(file_labels) weights += list(file_weights) labels = np.array(labels) weights = np.array(weights) train_data, _, train_labels, train_weights = get_class_data( data, IDs, labeled_IDs, labels, weights, classes=[1, -1]) print("Training...") model = ll.train(train_weights, train_labels, train_data, "-s 7") print("Predicting...") if test: test_data, test_IDs, test_labels, _ = get_class_data(data, IDs, labeled_IDs, labels, weights, classes=[0]) else: test_data = data test_IDs = IDs pred_labels, _, probs = \ ll.predict([], test_data, model, "-b 1") # keep only the probability that the gene is positive probs = np.array([sublist[0] for sublist in probs]) print("\nSaving predictions...") output_file = os.path.join( output_dir, "_".join(os.path.basename(labels_files[0]).split("_")[1:])) with open(output_file, "w") as f: for ID, label, prob in zip(test_IDs, pred_labels, probs): f.write("{0}\t{1}\n".format(ID, prob)) if save_model: print("\nSaving model...") model_file = os.path.basename(labels_file).split(".")[0] + ".model" ll.save_model(os.path.join(output_dir, model_file), model)
L = 12 codes = np.random.randint(2, size=(train_features.shape[0], L)) for i in range(mu.shape[0]): print('----------') print('[ITER] {:3d} mu = {:.4f}'.format(i, mu[i])) t_start = timeit.default_timer() models = h_step(train_features, codes, verbose=True) (A, old_Z) = f_step(train_features, models, verbose=True) (codes, loss) = z_step(train_features, models, A, old_Z, mu[i]) t_end = timeit.default_timer() recon_error = np.linalg.norm(train_features - codes.dot(A), axis=1)**2 print('[ITER] {:3d} train set recon error: {:.4f}'.format( i, sum(recon_error) / train_features.shape[0])) print('[ITER] {:3d} test set recon error: {:.4f}'.format( i, test_recon(test_features, models, A))) print('[ITER] {:3d} {:.4f} seconds elapsed'.format(i, t_end - t_start)) # print('[ITER] {:3d} loss: {:.4f}'.format(i, loss)) for (m, i) in zip(models, range(len(models))): liblinearutil.save_model( 'models/tr{0:05d}-L{1:02d}-b{2:02d}.model'.format( train_features.shape[0], L, i), m) hash(features, num_train, L) print(calc_mean_ap(labels, num_test, num_train, L)) print(calc_precision_at_k(labels, num_test, num_train, L, 50))
def train_model(): """训练模型 """ y, x = svm_read_problem(TRAIN_INPUT_FILE) m = train(y, x, '-c 4') save_model(SVM_MODEL_FILE, m)
def TrainFromSubsegFiles(self, ontology_file, sub_segments_file, feature_list, percent, tokenizer_mode, model_dir): # deal with model dir if os.path.exists(model_dir): shutil.rmtree(model_dir,True) os.mkdir(model_dir) self.ontology_file = ontology_file self.tagsets = ontology_reader.OntologyReader(ontology_file).get_tagsets() input = codecs.open(sub_segments_file, 'r', 'utf-8') sub_segments = json.load(input) input.close() # stat train samples print 'stat train samples' self.feature = feature(self.tagsets, feature_list, percent, tokenizer_mode) self.feature.stat_Lexicon(sub_segments) self.slots = self._stat_slot(sub_segments) for slot in self.slots: self.train_samples[slot] = [[],[]] for session in sub_segments['sessions']: print '%d' %(session['session_id']) for sub_seg in session['sub_segments']: feature_vector = self.feature.ExtractFeatureFromSubseg(sub_seg) mentioned_slots = [] for slot, values in sub_seg['frame_label'].items(): if slot == 'INFO': for value in values: slot_name = '%s:%s' %(slot,value) if slot_name not in mentioned_slots: mentioned_slots.append(slot_name) else: if slot not in mentioned_slots: mentioned_slots.append(slot) for slot, train_samples in self.train_samples.items(): if slot in mentioned_slots: train_samples[0].append(1) else: train_samples[0].append(0) train_samples[1].append(feature_vector) # train svm model print 'train svm models' for slot, train_samples in self.train_samples.items(): print 'Train slot: %s' %(slot) prob = problem(train_samples[0], train_samples[1]) param = parameter('-s 0 -c 1') self.models[slot] = liblinear.train(prob, param) # save model print 'save models' out_json = {} out_json['slots'] = self.slots out_json['train_samples_file'] = 'train_samples.json' out_json['feature_lexicon_file'] = 'feature_lexicon.json' out_json['ontology_file'] = 'ontology.json' output = codecs.open(os.path.join(model_dir, 'config.json'), 'w', 'utf-8') json.dump(out_json, output, indent=4) output.close() # save ontology file shutil.copyfile(self.ontology_file, os.path.join(model_dir,out_json['ontology_file'])) # save train samples output = codecs.open(os.path.join(model_dir, out_json['train_samples_file']), 'w', 'utf-8') json.dump(self.train_samples, output, indent=4) output.close() # save feature self.feature.save_Lexicon(os.path.join(model_dir, out_json['feature_lexicon_file'])) # save svm models for slot in self.slots: save_model(os.path.join(model_dir, '%s.svm.m' %(slot)), self.models[slot]) print 'Done!'
def save(self, path): liblinear.save_model(path + '-model', self._model) del (self._model) ml.Classifier.save(self, path)
featp = parsePath(featp) scores = [] for line in open(testfile, "r").readlines(): filename, score, label = line.strip().split() scores.append((filename, float(score))) scores.sort(key=operator.itemgetter(1), reverse=True) # binary weigting , CPRF poslst = ["%s%s" % (featp, one[0]) for one in scores[:topn]] neglst = ["%s%s" % (featp, one[0]) for one in scores[-botm:]] if (oripos != ""): poslst.extend([line.strip() for line in open(oripos, "r").readlines()]) if (orineg != ""): neglst.extend([line.strip() for line in open(orineg, "r").readlines()]) rerankingmodel = trainmodel(poslst, neglst) if (rerankingmodelf != ""): save_model(rerankingmodelf, rerankingmodel) #print rerankingmodel.get_decfun()[0][:5] # len() 12288 model = rerankingmodel.get_decfun()[0] #thres = -0.7 featfiles = getFiles(featp, "bof") featfiles.sort() for i in xrange(len(featfiles)): filename = os.path.basename(featfiles[i]) feat = parseLS(open(featfiles[i], "r").read()) score = predict_this(model, feat) if (smoothing): scores = [score] for j in xrange(i - 2, i + 3): if ((j < len(featfiles)) and (j >= 0)): feat = parseLS(open(featfiles[j], "r").read()) scores.append(predict_this(model, feat))