class FeaturesPerceptronRanker(BasePerceptronRanker): """Base class for global ranker for whole trees, based on features.""" def __init__(self, cfg): super(FeaturesPerceptronRanker, self).__init__(cfg) if not cfg: cfg = {} self.feats = ['bias: bias'] self.vectorizer = None self.normalizer = None self.binarize = cfg.get('binarize', False) # initialize feature functions if 'features' in cfg: self.feats.extend(cfg['features']) self.feats = Features(self.feats, cfg.get('intermediate_features', [])) def _extract_feats(self, tree, da): feats = self.vectorizer.transform( [self.feats.get_features(tree, {'da': da})]) if self.normalizer: feats = self.normalizer.transform(feats) return feats[0] def _init_training(self, das_file, ttree_file, data_portion): super(FeaturesPerceptronRanker, self)._init_training(das_file, ttree_file, data_portion) # precompute training data features X = [] for da, tree in zip(self.train_das, self.train_trees): X.append(self.feats.get_features(tree, {'da': da})) if self.prune_feats > 1: self._prune_features(X) # vectorize and binarize or normalize (+train vectorizer/normalizer) if self.binarize: self.vectorizer = DictVectorizer(sparse=False, binarize_numeric=True) self.train_feats = self.vectorizer.fit_transform(X) else: self.vectorizer = DictVectorizer(sparse=False) self.normalizer = StandardScaler(copy=False) self.train_feats = self.normalizer.fit_transform( self.vectorizer.fit_transform(X)) log_info('Features matrix shape: %s' % str(self.train_feats.shape)) def _prune_features(self, X): """Prune features – remove all entries from X that involve features not having a specified minimum occurrence count. """ counts = defaultdict(int) for inst in X: for key in inst.iterkeys(): counts[key] += 1 for inst in X: for key in inst.keys(): if counts[key] < self.prune_feats: del inst[key]
class FeaturesPerceptronRanker(BasePerceptronRanker): """Base class for global ranker for whole trees, based on features.""" def __init__(self, cfg): super(FeaturesPerceptronRanker, self).__init__(cfg) if not cfg: cfg = {} self.feats = ['bias: bias'] self.vectorizer = None self.normalizer = None self.binarize = cfg.get('binarize', False) # initialize feature functions if 'features' in cfg: self.feats.extend(cfg['features']) self.feats = Features(self.feats, cfg.get('intermediate_features', [])) def _extract_feats(self, tree, da): feats = self.vectorizer.transform([self.feats.get_features(tree, {'da': da})]) if self.normalizer: feats = self.normalizer.transform(feats) return feats[0] def _init_training(self, das_file, ttree_file, data_portion): super(FeaturesPerceptronRanker, self)._init_training(das_file, ttree_file, data_portion) # precompute training data features X = [] for da, tree in zip(self.train_das, self.train_trees): X.append(self.feats.get_features(tree, {'da': da})) if self.prune_feats > 1: self._prune_features(X) # vectorize and binarize or normalize (+train vectorizer/normalizer) if self.binarize: self.vectorizer = DictVectorizer(sparse=False, binarize_numeric=True) self.train_feats = self.vectorizer.fit_transform(X) else: self.vectorizer = DictVectorizer(sparse=False) self.normalizer = StandardScaler(copy=False) self.train_feats = self.normalizer.fit_transform(self.vectorizer.fit_transform(X)) log_info('Features matrix shape: %s' % str(self.train_feats.shape)) def _prune_features(self, X): """Prune features – remove all entries from X that involve features not having a specified minimum occurrence count. """ counts = defaultdict(int) for inst in X: for key in inst.iterkeys(): counts[key] += 1 for inst in X: for key in inst.keys(): if counts[key] < self.prune_feats: del inst[key]
def train_ctc_model(train_file, test_file): """ Function of training Code Recognizer """ # training and test dataset (default) train_file = parameters_ctc['train_file'] test_file = parameters_ctc['test_file'] # extract features from two language models trained on Gigaword and StackOverflow features = Features(RESOURCES) train_tokens, train_features, train_labels = features.get_features(train_file, True) test_tokens, test_features, test_labels = features.get_features(test_file, False) # fastText embedding vocab_size, word_to_id, id_to_word, word_to_vec = get_word_dict_pre_embeds(train_file, test_file) train_ids, test_ids = get_train_test_word_id(train_file, test_file, word_to_id) # transform each ngram probability into a k-dimensional vector using Gaussian binning word_embeds = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (vocab_size, parameters_ctc['word_dim'])) for word in word_to_vec: word_embeds[word_to_id[word]]=word_to_vec[word] # concatenate the outputs with fastText embedding ctc_classifier = NeuralClassifier(len(train_features[0]), max(train_labels) + 1, vocab_size, word_embeds) ctc_classifier.to(device) # binary classifier optimizer = torch.optim.Adam(ctc_classifier.parameters(), lr=parameters_ctc["LR"]) step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) # prepare dataset train_x = Variable(torch.FloatTensor(train_features).to(device)) train_x_words = Variable(torch.LongTensor(train_ids).to(device)) train_y = Variable(torch.LongTensor(train_labels).to(device)) test_x = Variable(torch.FloatTensor(test_features).to(device)) test_x_words = Variable(torch.LongTensor(test_ids).to(device)) test_y = Variable(torch.LongTensor(test_labels).to(device)) # training for epoch in range(parameters_ctc['epochs']): loss = ctc_classifier.CrossEntropy(train_features, train_x_words, train_y) optimizer.zero_grad() loss.backward() optimizer.step() train_scores, train_preds = ctc_classifier(train_features, train_x_words) test_scores, test_preds = ctc_classifier(test_features, test_x_words) eval(test_preds, test_labels, "test") return ctc_classifier, vocab_size, word_to_id, id_to_word, word_to_vec, features
def get_data(): files = os.listdir('./MealNoMealData') meal_data_files = [] no_meal_data_files = [] for file in files: if 'Nomeal' in file: no_meal_data_files.append(os.path.join('./MealNoMealData', file)) else: meal_data_files.append(os.path.join('./MealNoMealData', file)) data = [] labels = [] for meal_data_file, no_meal_data_file in zip(meal_data_files, no_meal_data_files): preprocess_obj = Preprocess(meal_data_file) meal_df = preprocess_obj.get_dataframe() meal_features = Features(meal_df) meal_features.compute_features() # temp_meal_features = meal_features.pca_decomposition().tolist() temp_meal_features = meal_features.get_features() labels += [1] * len(temp_meal_features) preprocess_obj_ = Preprocess(no_meal_data_file) no_meal_df = preprocess_obj_.get_dataframe() no_meal_features = Features(no_meal_df) no_meal_features.compute_features() no_meal_features_ = no_meal_features.get_features() # no_meal_final_features = meal_features.pca.transform(no_meal_features_).tolist() no_meal_final_features = no_meal_features_ labels += [0] * len(no_meal_features_) for no_meal_feature in no_meal_final_features: temp_meal_features.append(no_meal_feature) for meal_no_meal_feature in temp_meal_features: data.append(meal_no_meal_feature) return data, labels
def build_dataset(self, selected_datasets): features = Features.get_features(self.data_dir, self.features_type) self.datasets = {t: [] for t in self.all_tasks} with open(self.filename) as f: f.readline() for line in f: values = line.strip().split() utt_id = values[0] dataset = values[1].strip().lower() raw_item = {k: v.lower() for k, v in zip(self.data_tasks, values[3:])} if utt_id in features: if dataset in selected_datasets: self._add_record(raw_item, features[utt_id], dataset) else: print("Utterance does not have features!!! utt_id: {}".format(utt_id))
def lambda_handler(event, context): # TODO implement json_data = json.loads(event['body']) preprocess = Preprocess(json_data=json_data) preprocess.scale_points(calculate_scale=False) pose_objects = preprocess.new_pose_objects features = [] features_obj = Features(pose_objects=pose_objects) features_obj.compute_features() features = features_obj.get_features() # pca_model = pickle.load(open('pca.pkl', 'rb')) # reduced_feature_matrix = pca_model.transform(features) s3 = boto3.resource('s3') svm_classifier = pickle.loads( s3.Bucket("gesture-recognition").Object("SVM_model.pkl").get() ['Body'].read()) logreg_classifier = pickle.loads( s3.Bucket("gesture-recognition").Object("LogReg_model.pkl").get() ['Body'].read()) lda_classifier = pickle.loads( s3.Bucket("gesture-recognition").Object("LDA_model.pkl").get() ['Body'].read()) random_forest_classifier = pickle.loads( s3.Bucket("gesture-recognition").Object("RForest_model.pkl").get() ['Body'].read()) prediction_rf = random_forest_classifier.predict(features) prediction_svm = svm_classifier.predict(features) prediction_lda = lda_classifier.predict(features) prediction_logreg = logreg_classifier.predict(features) data = { "1": prediction_svm[0], "2": prediction_logreg[0], "3": prediction_lda[0], "4": prediction_rf[0] } return {'statusCode': 200, 'body': json.dumps(data)}
def get_prediction(dict_files: Dict[str, str], model: str) -> Dict[str, int]: """ Function to get the prediction of the files :parameter dict_files will contain the file paths :parameter model will contain the specified model for prediction :attrib pred will contain a dictionary with the prediction This function will return the prediction in a dictionary """ pred = {} for key, value in dict_files.items(): print(value) data = Features.get_features(value) df = pd.DataFrame([data]) print(f"Predicting for {key}...") prediction = predict(df, model) pred[key] = prediction return pred
preprocess = Preprocess() preprocess.scale_points() pose_objects = preprocess.new_pose_objects features = [] features_obj = Features(pose_objects=pose_objects) features_obj.compute_features() # reduced_feature_matrix = features_obj.compute_pca() # print(reduced_feature_matrix) # print(len(reduced_feature_matrix),len(reduced_feature_matrix[0])) # X = reduced_feature_matrix X = features_obj.get_features() Y = [obj.label for obj in pose_objects] print(len(X), len(Y)) clf_rforest = Classification('RForest', X, Y) clf_rforest.get_classifier_object() clf_rforest.get_metrics() pickle.dump(clf_rforest.get_classifier(), open('RForest_model.pkl', 'wb')) print() clf_svm = Classification('svm', X, Y) clf_svm.get_classifier_object() clf_svm.get_metrics() pickle.dump(clf_svm.get_classifier(), open('SVM_model.pkl', 'wb')) print()
import pickle from preprocessing import Preprocess from features import Features import numpy as np import pandas as pd test_file_name = input("Please enter the test file name: ") preprocess_obj = Preprocess(test_file_name) test_file_dataframe = preprocess_obj.get_dataframe() test_file_features_obj = Features(test_file_dataframe) test_file_features_obj.compute_features() test_file_features = test_file_features_obj.get_features() # print(len(test_file_features)) # Random Forest random_forest_clf = pickle.load(open('RForest_model.pkl', 'rb')) y_pred = random_forest_clf.predict(test_file_features) print('Saving the output of RandomForest classifier prediction') rforest_dataframe = pd.DataFrame(y_pred, columns=['Meal/NoMeal']) rforest_dataframe.to_csv('RForest_output.csv') # AdaBoost adaboost_clf = pickle.load(open('Adaboost_model.pkl', 'rb')) y_pred = adaboost_clf.predict(test_file_features) print('Saving the output of AdaBoost classifier prediction') adaboost_dataframe = pd.DataFrame(y_pred, columns=['Meal/NoMeal']) adaboost_dataframe.to_csv('Adaboost_output.csv') # XGBoost XGBoost_clf = pickle.load(open('XGBoost_model.pkl', 'rb'))
class Agent: # name should contain only letters, digits, and underscores (not enforced by environment) __name = 'Based_Agent' def __init__(self, stateDim, actionDim, agentParams): self.__stateDim = stateDim self.__actionDim = actionDim self.__action = np.random.random(actionDim) self.__step = 0 self.__alpha = 0.001 self.__gamma = 0.9 self.__decision_every = 6 self.__explore_probability = 0.2 self.__max_replay_samples = 20 self.__features = Features() self.__previous_action = None self.__current_out = None self.__previous_out = None self.__previous_meta_state = None self.__previous_state = None self.__test = agentParams[0] if agentParams else None self.__exploit = False self.__segments = 2 self.__actions = 3**self.__segments try: self.__net = load_model('net') except: print('Creating new model') self.__net = Sequential([ Dense(50, activation='elu', input_dim=self.__features.dim), Dense(30, activation='elu'), Dense(self.__actions), Reshape((self.__actions, 1)) ]) self.__net.compile(optimizer=SGD(lr=self.__alpha), loss='mean_squared_error', sample_weight_mode='temporal') try: self.__replay = Replay.load('replay') except Exception as a: self.__replay = Replay(self.__actions) self.__replay_X = [] self.__replay_Y = [] def start(self, state): self.__previous_state = state self.__choose_action(state) self.__previous_out = self.__current_out return self.__action def step(self, reward, state): self.__previous_state = state self.__step += 1 if self.__step % self.__decision_every != 0: return self.__action self.__choose_action(state) if not self.__exploit: max_q = self.__current_out[np.argmax(self.__current_out)] self.__update_q(reward - self.__features.min_dist(state) / 100, max_q) self.__previous_out = self.__current_out return self.__action def end(self, reward): if not self.__exploit: self.__update_q(reward, reward) self.__replay.submit(self.__test, (self.__replay_X, self.__replay_Y), self.__step) self.__net.save('net') self.__replay.save('replay') def cleanup(self): pass def getName(self): return self.__name def __choose_action(self, state): meta_state = np.asarray(self.__features.get_features(state), dtype='float').reshape((1, self.__features.dim)) out = self.__net.predict_proba([meta_state], batch_size=1)[0].flatten() self.__current_out = out if self.__exploit or self.__explore_probability < np.random.random(): # take best action action = np.argmax(out) else: # take random action action = np.random.randint(0, self.__actions) self.__previous_action = action self.__previous_meta_state = meta_state self.__meta_to_action(action) def __update_q(self, reward, max_q): teach_out = self.__previous_out teach_out[self.__previous_action] = reward + self.__gamma * max_q # sampling from infinite stream if len(self.__replay_X) < self.__max_replay_samples: self.__replay_X.append(self.__previous_meta_state) self.__replay_Y.append((teach_out[self.__previous_action], self.__previous_action)) elif np.random.random() < self.__max_replay_samples/self.__step: to_replace = np.random.randint(0, self.__max_replay_samples) self.__replay_X[to_replace] = self.__previous_meta_state self.__replay_Y[to_replace] = (teach_out[self.__previous_action], self.__previous_action) self.__net.fit([self.__previous_meta_state], [teach_out.reshape(1, self.__actions, 1)], verbose=0) replay_x, replay_y, replay_w = self.__replay.get_training() if replay_x: data = list(zip(replay_x, replay_y, replay_w)) np.random.shuffle(data) for x, y, w in data: self.__net.fit([x], [y], sample_weight=[w], verbose=0) def __meta_to_action(self, meta): self.__action[:] = 0 for segment in range(self.__segments): segment_action = meta % 3 muscle_start = 30 * segment // self.__segments muscle_stop = 30 * (segment+1) // self.__segments if segment_action == 0: self.__action[muscle_start:muscle_stop:3] = 1 if segment_action == 1: self.__action[muscle_start+1:muscle_stop:3] = 1 if segment_action == 2: self.__action[muscle_start+2:muscle_stop:3] = 1 meta //= 3