def data_processing(dataset_dir): """ Data Processing """ train, validation, OO, NO, ON, NN, \ pre_treatment, drug_fingerprint_info \ = utils.data_processing(dataset_dir, parameters.train_data_path, parameters.validation_data_path, parameters.OO_data_path, parameters.NO_data_path, parameters.ON_data_path, parameters.NN_data_path, parameters.pre_treatment_data_path, parameters.drug_fingerprint_data_path ) """ Printing Data Statistics """ utils.print_statistics(len(train), len(validation), len(OO), len(NO), len(ON), len(NN), len(pre_treatment), len(drug_fingerprint_info)) return train, validation, OO, NO, ON, NN, \ pre_treatment, drug_fingerprint_info
def feature_selection(x_train, y_train, x_test, y_test): print("Feature selection with LinearSVC") model = LinearSVC(C=0.1, penalty='l2') rfe = RFE(model, 5) best_features_model = rfe.fit(x_train, y_train) y_hat = best_features_model.predict(x_test) utils.print_statistics(y_test, y_hat)
def grid_classifier(x_train, y_train, x_test, y_test, model, parameters, make_feature_analysis=False, feature_names=None, top_features=0, plot_name="coeff"): grid = GridSearchCV(estimator=model, param_grid=parameters, verbose=0) grid.fit(x_train, y_train) sorted(grid.cv_results_.keys()) classifier = grid.best_estimator_ if make_feature_analysis: utils.plot_coefficients(classifier, feature_names, top_features, plot_name) y_hat = classifier.predict(x_test) utils.print_statistics(y_test, y_hat)
def predict(model, x_test, y_test): y = [] y_pred = [] prediction_probability = model.predict(x_test) print("Predicted probability length: ", len(prediction_probability)) for i, (_) in enumerate(prediction_probability): predicted = np.argmax(prediction_probability[i]) y.append(int(y_test[i])) y_pred.append(predicted) utils.print_statistics(y, y_pred)
def logistic_regression(x_train, y_train, x_test, y_test, class_ratio='balanced'): utils.print_model_title("Logistic Regression") regr = LogisticRegression(C=0.01, class_weight=class_ratio, penalty='l2') regr.fit(x_train, y_train) y_hat = regr.predict(x_test) utils.print_statistics(y_test, y_hat)
def rule_based_comparison(x_train, y_train, x_test, y_test, vocab_filename, verbose=False): # Build a vocabulary and count the sarcastic or non-sarcastic context in which a word appears vocab = data_proc.build_vocabulary(vocab_filename, x_train, minimum_occurrence=10) # vocab = set(' '.join([x.lower() for x in x_train]).split()) # this includes all words in the train set counts = {k: [0, 0] for k in vocab} for tw, y in zip(x_train, y_train): for word in tw.split(): word = word.lower() if word in vocab: if y == 0: counts[word][0] += 1 else: counts[word][1] += 1 # Calculate the relative weight of each word, based on the sarcastic/non-sarcastic tweets that it appears weight = dict.fromkeys([k for k in counts.keys()], 0) for word in counts.keys(): if counts[word][1] + counts[word][0] != 0: weight[word] = (counts[word][1] - counts[word][0]) / ( counts[word][1] + counts[word][0]) if verbose: total_sarcastic = sum([1 for y in y_train if y == 1]) stopwords = data_proc.get_stopwords_list() probs = { word: (counts[word][1] / total_sarcastic) for word in counts.keys() if word not in stopwords and word.isalnum() } print("Top 10 most sarcastic items: ", ' '.join(sorted(probs, key=probs.get, reverse=True)[:10])) # Rule-based predictions based on the previously calculated weigths y_pred = [] for tw, y in zip(x_test, y_test): score = 0.0 for word in tw.split(): word = word.lower() if word in vocab: score += weight[word] if score >= 0.0: y_pred.append(1) else: y_pred.append(0) utils.print_statistics(y_test, y_pred)
def nn_bow_model(x_train, y_train, x_test, y_test, results, mode, epochs=15, batch_size=32, hidden_units=50, save=False, plot_graph=False): # Build the model print("\nBuilding Bow NN model...") model = Sequential() model.add( Dense(hidden_units, input_shape=(x_train.shape[1], ), activation='sigmoid')) model.add(Dense(1, activation='sigmoid')) model.summary() # Train using binary cross entropy loss, Adam implementation of Gradient Descent model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', utils.f1_score]) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1) if plot_graph: utils.plot_training_statistics(history, "/plots/bow_models/bow_%s_mode" % mode) # Evaluate the model loss, acc, f1 = model.evaluate(x_test, y_test, batch_size=batch_size) results[mode] = [loss, acc, f1] classes = model.predict_classes(x_test, batch_size=batch_size) y_pred = [item for c in classes for item in c] utils.print_statistics(y_test, y_pred) print("%d examples predicted correctly." % np.sum(np.array(y_test) == np.array(y_pred))) print("%d examples predicted 1." % np.sum(1 == np.array(y_pred))) print("%d examples predicted 0." % np.sum(0 == np.array(y_pred))) if save: json_name = path + "/models/bow_models/json_bow_" + mode + "_mode.json" h5_weights_name = path + "/models/bow_models/h5_bow_" + mode + "_mode.json" utils.save_model(model, json_name=json_name, h5_weights_name=h5_weights_name)
async def consume_the_messages_stream_consumer(): stream_consumer = Consumer( brokers=config.KAFKA_URL, topics=[config.TOPIC], rdk_consumer_config=config.RDK_CONSUMER_CONFIG, rdk_topic_config=config.RDK_TOPIC_CONFIG ) stream_consumer.start() messages_consumed = 0 print("Starting to consume the messages.") with utils.Timer() as timer: async for message in stream_consumer: messages_consumed += 1 if messages_consumed == config.MESSAGE_NUMBER: stream_consumer.stop() print(f"The time used to consume the messages is {timer.interval} " f"seconds.") utils.print_statistics(timer.interval)
async def fill_topic_with_messages(): producer = Producer( brokers=config.KAFKA_URL, rdk_producer_config=config.RDK_PRODUCER_CONFIG, rdk_topic_config=config.RDK_TOPIC_CONFIG, ) producer.start() messages_consumed = 0 print(f"Preparing benchmark. Filling topic {config.TOPIC} with " f"{config.MESSAGE_NUMBER} messages of {config.MESSAGE_BYTES} bytes " f"each one.") await asyncio.sleep(0.1) with utils.Timer() as timer: for _ in range(config.MESSAGE_NUMBER): messages_consumed += 1 await producer.produce(config.TOPIC, config.MESSAGE) producer.stop() print(f"The producer time to send the messages is {timer.interval} " f"seconds.") utils.print_statistics(timer.interval)
### prepare training, test data and evaluator train_data_file = 'hmsvm_%d_distort_data_fold' % distort train_num_examples_fold = 20 train_num_folds = 5 train_labels, train_features = utils.unfold_data(train_data_file) test_data_file = 'hmsvm_%d_distort_data_test' % distort test_num_examples = 100 test_labels, test_features = utils.read_mat_file(test_data_file, test_num_examples) ### train ML-HMM and evaluate in training data model = HMSVMModel(train_features, train_labels, SMT_TWO_STATE) model.set_use_plifs(True) mlhmm = MLHMM(model) mlhmm.train() prediction = mlhmm.apply() accuracy = evaluator.evaluate(prediction, train_labels) print '\ttraining accuracy:\t' + str(accuracy*100) + '%' utils.print_statistics(train_labels, prediction) ### evaluate in test data prediction = mlhmm.apply(test_features) accuracy = evaluator.evaluate(prediction, test_labels) print '\ttest accuracy:\t\t' + str(accuracy*100) + '%' utils.print_statistics(test_labels, prediction)
assert(labels_k_fold.get_num_labels() == features_k_fold.get_num_vectors()) for i in xrange(labels_k_fold.get_num_labels()): labels_no_kfold.add_label(labels_k_fold.get_label(i)) features_no_kfold.set_feature_vector(features_k_fold.get_feature_vector(i), idx) idx += 1 labels_no_fold.append(labels_no_kfold) features_no_fold.append(features_no_kfold) ### training and validation evaluator = StructuredAccuracy() evaluator.io.set_loglevel(MSG_DEBUG) accuracies = [] print 'training HMM' for k in xrange(K): model = HMSVMModel(features_no_fold[k], labels_no_fold[k], SMT_TWO_STATE) model.set_use_plifs(True) hmm = MLHMM(model) print '\ton fold %d' % k, hmm.train() prediction = hmm.apply(models[k].get_features()) accuracy = evaluator.evaluate(prediction, models[k].get_labels()) print str(accuracy*100) + '%' utils.print_statistics(models[k].get_labels(), prediction) accuracies.append(accuracy) print 'overall success rate of ' + str(numpy.mean(accuracies)*100) + '%'
threads.append((id(thread) & 0xFFFF, event, thread)) thread.setDaemon(True) thread.start() while(any(thread[2].isAlive() == True for thread in threads)): ready = select.select([ping_socket], [], [], 1) if not ready[0]: continue try: with lock: recPacket, addr = ping_socket.recvfrom(2048) except socket.error: print("Can't receive") icmp_header = recPacket[20:28] type, code, checksum, port, sequence = struct.unpack('!BBHHH', icmp_header) index = [index for index, thread in enumerate(threads) if thread[0] == port] if type == 0 and index: threads[index[0]][2].add_received_packet(recPacket) threads[index[0]][1].set() print_statistics(threads) [thread.join() for id, event, thread in threads] ping_socket.close()
def linear_svm(x_train, y_train, x_test, y_test, class_ratio='balanced'): utils.print_model_title("Linear SVM") svm = LinearSVC(C=0.01, class_weight=class_ratio, penalty='l2') svm.fit(x_train, y_train) y_hat = svm.predict(x_test) utils.print_statistics(y_test, y_hat)
doc_size = len(final_tokens) doc_id = "D" + str(doc_no) for i in range(len(final_tokens)): pos = i + 1 word = final_tokens[i] #doc_id = "D" + str(doc_no) index_found = 0 for index in range(len(index_list)): if index_list[index][0] == word: item = index_list[index][1] item.append([str(doc_id), pos]) index_list[index][1] = item index_found = 1 break if index_found == 0: index_list.append([word, [[str(doc_id), pos]]]) doc_info[doc_id] = (doc_num, doc_size) index_list.sort(key=itemgetter(0, 1)) with open('indexA.json', 'w') as outfile: json.dump(index_list, outfile) with open('docinfoA.json', 'w') as outfile: json.dump(doc_info, outfile) for item in index_list: word_dist[str(item[0])] = len(item[1]) index_size = len(index_list) utils.print_statistics(doc_no, word_dist, index_size)
doc_size = len(final_tokens) doc_id = "D" + str(doc_no) for i in range(len(final_tokens)): pos = i+1 word = final_tokens[i] #doc_id = "D" + str(doc_no) index_found = 0 for index in range(len(index_list)): if index_list[index][0]==word: item = index_list[index][1] item.append([str(doc_id), pos]) index_list[index][1] = item index_found = 1 break if index_found == 0: index_list.append([word,[[str(doc_id),pos]]]) doc_info[doc_id] = (doc_num, doc_size) index_list.sort(key=itemgetter(0,1)) with open('indexA.json', 'w') as outfile: json.dump(index_list, outfile) with open('docinfoA.json', 'w') as outfile: json.dump(doc_info, outfile) for item in index_list: word_dist[str(item[0])] = len(item[1]) index_size = len(index_list) utils.print_statistics(doc_no, word_dist, index_size)
evaluator = StructuredAccuracy() ### train ML-HMM and evaluate in training data print 'training ML-HMM' model = HMSVMModel(train_features, train_labels, SMT_TWO_STATE) model.set_use_plifs(True) mlhmm = MLHMM(model) mlhmm.train() ''' print '\n\tmodel parameters:' print '\t- transition scores: ' + str(numpy.exp(mlhmm.transition_scores)) print '\t- feature scores:' for s,f in product(xrange(mlhmm.num_free_states), xrange(mlhmm.num_features)): print '\t\tstate %d feature %d:\n%s' % (s, f, str(numpy.exp(mlhmm.feature_scores[f,s,:]))) ''' prediction = mlhmm.apply() accuracy = evaluator.evaluate(prediction, train_labels) print '\n\ttraining accuracy: ' + str(accuracy * 100) + '%' utils.print_statistics(train_labels, prediction) ### evaluate in test data print 'testing ML-HMM' prediction = mlhmm.apply(test_features) accuracy = evaluator.evaluate(prediction, test_labels) print '\ttest accuracy: ' + str(accuracy * 100) + '%' utils.print_statistics(test_labels, prediction)
def trainer(cfg: DictConfig) -> None: os.environ["L5KIT_DATA_FOLDER"] = cfg.l5kit_data_folder dm = LocalDataManager(None) logger = logging.getLogger(__name__) logger.info("Working directory : {}".format(os.getcwd())) logger.info("Load dataset...") train_cfg = cfg["train_data_loader"] valid_cfg = cfg["valid_data_loader"] # rasterizer rasterizer = build_rasterizer(cfg, dm) train_path = train_cfg["key"] train_zarr = ChunkedDataset(dm.require(train_path)).open(cached=False) logger.info(f"train_zarr {type(train_zarr)}") # loading custom mask (we mask static agents) logger.info(f"Loading mask in path {train_cfg['mask_path']}") custom_mask = np.load(train_cfg['mask_path']) logger.info(f"Length of training mask is: {custom_mask.sum()}") train_agent_dataset = AgentDataset(cfg, train_zarr, rasterizer, agents_mask=custom_mask) # transform dataset to the proper frame of reference train_dataset = TransformDataset(train_agent_dataset, cfg) if not train_cfg['subset'] == -1: train_dataset = Subset(train_dataset, np.arange(train_cfg['subset'])) train_loader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) logger.info(train_agent_dataset) # loading custom mask for validation dataset logger.info(f"Loading val mask in path {valid_cfg['mask_path']}") val_custom_mask = np.load(valid_cfg['mask_path']) logger.info(f"Length of validation mask is: {val_custom_mask.sum()}") valid_path = valid_cfg["key"] valid_zarr = ChunkedDataset(dm.require(valid_path)).open(cached=False) logger.info(f"valid_zarr {type(train_zarr)}") valid_agent_dataset = AgentDataset(cfg, valid_zarr, rasterizer, agents_mask=val_custom_mask) # transform validation dataset to the proper frame of reference valid_dataset = TransformDataset(valid_agent_dataset, cfg) if not valid_cfg['subset'] == -1: valid_dataset = Subset(valid_dataset, valid_cfg['subset']) valid_loader = DataLoader( valid_dataset, shuffle=valid_cfg["shuffle"], batch_size=valid_cfg["batch_size"], num_workers=valid_cfg["num_workers"] ) logger.info(valid_agent_dataset) logger.info(f"# Full AgentDataset train: {len(train_agent_dataset)} #valid: {len(valid_agent_dataset)}") logger.info(f"# Actual AgentDataset train: {len(train_dataset)} #valid: {len(valid_dataset)}") n_epochs = cfg['train_params']['num_epochs'] d_steps = cfg['train_params']['num_d_steps'] g_steps = cfg['train_params']['num_g_steps'] noise_dim = cfg['gan_params']['noise_dim'] g_learning_rate = cfg['train_params']['g_learning_rate'] d_learning_rate = cfg['train_params']['d_learning_rate'] if cfg['gan_params']['gan_type'] == 'vanilla': cross_entropy = nn.BCELoss() generator = Generator(input_dim=cfg['gan_params']['input_dim'], embedding_dim=cfg['gan_params']['embedding_dim'], decoder_dim=cfg['gan_params']['decoder_dim'], trajectory_dim=cfg['model_params']['future_num_frames'], noise_dim=noise_dim, backbone_type=cfg['gan_params']['backbone_type'], embedding_type=cfg['gan_params']['embedding_type'] ) generator.to(cfg['device']) generator.train() # train mode W = cfg['raster_params']['raster_size'][0] discriminator = Discriminator(width=W, h_0=cfg['raster_params']['ego_center'][0]*W, w_0=cfg['raster_params']['ego_center'][1]*W, r=cfg['raster_params']['pixel_size'][0], sigma=cfg['gan_params']['sigma'], channels_num=cfg['model_params']['future_num_frames']+3, num_disc_feats=cfg['gan_params']['num_disc_feats'], input_dim=cfg['gan_params']['input_dim'], device=cfg['device'], gan_type=cfg['gan_params']['gan_type'], embedding_type=cfg['gan_params']['embedding_type'], lstm_embedding_dim=cfg['gan_params']['embedding_dim'] ) discriminator.to(cfg['device']) discriminator.apply(weights_init) discriminator.train() # train mode if cfg['gan_params']['gan_type'] == 'wasserstein': optimizer_g = optim.RMSprop(generator.parameters(), lr=g_learning_rate) optimizer_d = optim.RMSprop(discriminator.parameters(), lr=d_learning_rate) elif cfg['gan_params']['gan_type'] == 'wasserstein_gp': betas = (0.0, 0.9) optimizer_g = optim.Adam(generator.parameters(), lr=g_learning_rate, betas=betas) optimizer_d = optim.Adam(discriminator.parameters(), lr=d_learning_rate, betas=betas) else: optimizer_g = optim.Adam(generator.parameters(), lr=g_learning_rate) optimizer_d = optim.Adam(discriminator.parameters(), lr=d_learning_rate) d_steps_left = d_steps g_steps_left = g_steps # variables for statistics d_full_loss = [] g_full_loss = [] gp_values = [] l2_variety_values = [] metric_vals = [] # checkpoint dictionary checkpoint = { 'G_losses': defaultdict(list), 'D_losses': defaultdict(list), 'counters': { 't': None, 'epoch': None, }, 'g_state': None, 'g_optim_state': None, 'd_state': None, 'd_optim_state': None } id_batch = 0 # total number of batches len_of_epoch = len(train_loader) for epoch in range(n_epochs): for batch in train_loader: batch = [tensor.to(cfg['device']) for tensor in batch] # Creates single raster image from sequence of images from l5kit's AgentDataset batch[0] = f_get_raster_image(cfg=cfg, images=batch[0], history_weight=cfg['model_params']['history_fading_weight']) (image, target_positions, target_availabilities, history_positions, history_yaws, centroid, world_to_image) = batch actor_state = (history_positions, history_yaws) batch_size = image.shape[0] # noise for generator noise = torch.normal(size=(batch_size, noise_dim), mean=0.0, std=1.0, dtype=torch.float32, device=cfg['device']) ####################################### # TRAIN DISCRIMINATOR ####################################### # train discriminator (d_steps_left) times (using different batches) # train generator (g_steps_left) times (using different batches) if d_steps_left > 0: d_steps_left -= 1 for pd in discriminator.parameters(): # reset requires_grad pd.requires_grad = True # they are set to False below in generator update # freeze generator while training discriminator for pg in generator.parameters(): pg.requires_grad = False discriminator.zero_grad() # generate fake trajectories (batch_size, target_size, 2) for current batch fake_trajectory = generator(image, actor_state, noise) # discriminator predictions (batch_size, 1) on real and fake trajectories d_real_pred = discriminator(target_positions, image, actor_state) d_g_pred = discriminator(fake_trajectory, image, actor_state) # loss if cfg['gan_params']['gan_type'] == 'vanilla': # tensor with true/fake labels of size (batch_size, 1) real_labels = torch.full((batch_size,), 1, dtype=torch.float, device=cfg['device']) fake_labels = torch.full((batch_size,), 0, dtype=torch.float, device=cfg['device']) real_loss = cross_entropy(d_real_pred, real_labels) fake_loss = cross_entropy(d_g_pred, fake_labels) total_loss = real_loss + fake_loss elif cfg['gan_params']['gan_type'] == 'wasserstein': # D(fake) - D(real) total_loss = torch.mean(d_g_pred) - torch.mean(d_real_pred) elif cfg['gan_params']['gan_type'] == 'wasserstein_gp': gp_loss = gradient_penalty(discrim=discriminator, real_trajectory=target_positions, fake_trajectory=fake_trajectory, in_image=image, in_actor_state=actor_state, lambda_gp=cfg['losses']['lambda_gp'], device=cfg['device']) total_loss = torch.mean(d_g_pred) - torch.mean(d_real_pred) + gp_loss else: raise NotImplementedError # calculate gradients for this batch total_loss.backward() optimizer_d.step() # weight clipping for discriminator in pure Wasserstein GAN if cfg['gan_params']['gan_type'] == 'wasserstein': c = cfg['losses']['weight_clip'] for p in discriminator.parameters(): p.data.clamp_(-c, c) d_full_loss.append(total_loss.item()) if cfg['gan_params']['gan_type'] == 'wasserstein_gp': gp_values.append(gp_loss.item()) ####################################### # TRAIN GENERATOR ####################################### elif g_steps_left > 0: # we either train generator or discriminator on current batch g_steps_left -= 1 for pd in discriminator.parameters(): pd.requires_grad = False # avoid discriminator training # unfreeze generator for pg in generator.parameters(): pg.requires_grad = True generator.zero_grad() if cfg['losses']['use_variety_l2']: l2_variety_loss, fake_trajectory = l2_loss_kmin(traj_real=target_positions, generator_=generator, image=image, actor_state=actor_state, cfg=cfg, kmin=cfg['losses']['k_min'], return_best_traj=True) else: fake_trajectory = generator(image, actor_state, noise) d_g_pred = discriminator(fake_trajectory, image, actor_state) if cfg['gan_params']['gan_type'] == 'vanilla': # while training generator we associate generated fake examples # with real labels in order to measure generator quality real_labels = torch.full((batch_size,), 1, dtype=torch.float, device=cfg['device']) fake_loss = cross_entropy(d_g_pred, real_labels) elif cfg['gan_params']['gan_type'] in ['wasserstein', 'wasserstein_gp']: # -D(fake) fake_loss = -torch.mean(d_g_pred) else: raise NotImplementedError if cfg['losses']['use_variety_l2']: fake_loss += cfg['losses']['weight_variety_l2'] * l2_variety_loss l2_variety_values.append(l2_variety_loss.item()) fake_loss.backward() optimizer_g.step() g_full_loss.append(fake_loss.item()) # renew d_steps_left, g_steps_left at the end of full discriminator-generator training cycle if d_steps_left == 0 and g_steps_left == 0: d_steps_left = d_steps g_steps_left = g_steps # print current model state on train dataset if (id_batch > 0) and (id_batch % cfg['train_params']['print_every_n_steps'] == 0): print_statistics(logger=logger, cfg=cfg, epoch=epoch, len_of_epoch=len_of_epoch, id_batch=id_batch, d_full_loss=d_full_loss, g_full_loss=g_full_loss, gp_values=gp_values, l2_variety_values=l2_variety_values, print_over_n_last=1000) # save rasterized image of 0th element of current batch plot_traj_on_map(cfg, 0, batch, generator, save_name=str(id_batch), save_directory=cfg['train_params']['image_sample_dir']) # Save checkpoint and evaluate the model if (id_batch > 0) and (id_batch % cfg['train_params']['checkpoint_every_n_steps'] == 0): checkpoint['counters']['t'] = id_batch checkpoint['counters']['epoch'] = epoch # Check stats on the validation set logger.info('Checking stats on val ...') metrics_val = evaluate(cfg, generator, valid_loader) metric_vals.append(metrics_val) with open('metric_vals_list.pkl', 'wb') as handle: pickle.dump(metric_vals, handle, protocol=pickle.HIGHEST_PROTOCOL) for k, v in sorted(metrics_val.items()): logger.info(' [val] {}: {:.3f}'.format(k, v)) checkpoint['g_state'] = generator.state_dict() checkpoint['g_optim_state'] = optimizer_g.state_dict() checkpoint['d_state'] = discriminator.state_dict() checkpoint['d_optim_state'] = optimizer_d.state_dict() checkpoint_path = os.path.join(os.getcwd(), f"{cfg['model_name']}_{id_batch}.pt") logger.info('Saving checkpoint to {}'.format(checkpoint_path)) torch.save(checkpoint, checkpoint_path) logger.info('Done.') results_df, metric_df = get_results_plot(d_full_loss, g_full_loss, metric_vals, train_window_size=100, val_window_size=10, is_save=True) results_df.to_excel('results.xlsx', index=False) metric_df.to_excel('val_metrics.xlsx', index=False) id_batch = id_batch + 1
lo, hi = seq_limits[i] labels.add_vector_label(state_seq[lo:hi + 1]) features.set_feature_vector(feat_mat[:, lo:hi + 1], i) print 'num_labels=%d' % labels.get_num_labels() print 'num_states=%d' % labels.get_num_states() print 'num_features=%d' % features.get_num_features() print 'num_vectors=%d' % features.get_num_vectors() model = HMSVMModel(features, labels, SMT_TWO_STATE) model.set_use_plifs(True) # sosvm = DualLibQPBMSOSVM(model, labels, 5000.0) # sosvm = StochasticSOSVM(model, labels) # sosvm.set_lambda(1) # sosvm.set_verbose(True) hinge_loss = HingeLoss() sosvm = PrimalMosekSOSVM(model, labels) sosvm.set_regularization(50) sosvm.io.set_loglevel(MSG_DEBUG) print 'Training SO-SVM...' sosvm.train() print '\tdone!' print sosvm.get_w() predicted = sosvm.apply(model.get_features()) evaluator = StructuredAccuracy() acc = evaluator.evaluate(predicted, model.get_labels()) print 'Training accuracy = %.4f' % acc utils.print_statistics(labels, predicted)