def preprocess(): data_utils.preprocess_data( data_paths=[FLAGS.train_data_file, FLAGS.valid_data_file], vocab_path=FLAGS.vocabulary_file, embedding_path=FLAGS.save_embedding_file, train_data_path=FLAGS.train_data_file, valid_data_path=FLAGS.valid_data_file )
from collections import defaultdict import math import numpy as np from data_utils import read_data, preprocess_data, clean_text, read_weights if __name__ == '__main__': data, _ = read_data(use_loaded=True) X, y, emb, tokenizer, label_encoder = preprocess_data(data=data, use_loaded=True) with open("data/adjectives_people.txt", "r", encoding="utf-8") as fin: identity_columns = [line.strip() for line in fin.readlines()] cleaned_text = data["text"].apply(clean_text).values debias_weights = np.ones(len(y)) num_pos, num_all = defaultdict(int), defaultdict(int) sum_pos = sum([debias_weights[i] for i in range(len(y)) if y[i] == 0]) sum_all = sum(debias_weights) for idty in identity_columns: for i in range(len(cleaned_text)): ok = False sen = cleaned_text[i] if idty in ["american", "african"]: sen_split = sen.split() for j in range(len(sen_split)): if sen_split[j] == idty: if j == 0 or " ".join([sen_split[j - 1], sen_split[j] ]) != "american african": if j == len(sen_split) - 1 or " ".join([ sen_split[j], sen_split[j + 1]
logging.basicConfig(stream=sys.stdout, level=logging.INFO) model_name = "c3d" # alexnet, scattering, c3d layer = "conv2" # chooses layer num_frames_per_clip = 3 # for c3d only, must generate more than 1 clip! device = args.device # '/cpu:0', '/gpu:0' data_dir = "/scratch/users/vision/reza/v4" name = layer # this can be anything # indirect params out_dir = oj("/scratch/users/vision/chandan/out", model_name + \ "_" + name + "_" + time.strftime("%b%d_%H:%M:%S")) np.random.seed(13) # choose model ims, _ = data_utils.load_data(data_dir, im_range=im_ranges_list[0]) ims = data_utils.preprocess_data(ims=ims) if model_name == "alexnet": # alexnet alone from models.alexnet.alexnet_model import build_model placeholder, model = build_model(ims.shape[1:]) model = model[layer] elif model_name == "scattering": # scattering alone from models.scattering.scattering_model import build_model ims = np.transpose(ims, (0, 3, 1, 2)) # convert NHWC -> NCHW placeholder, model = build_model(ims.shape[1:]) # extract features for i in range(len(im_ranges_list)): im_range = im_ranges_list[i] ims, _ = data_utils.load_data(data_dir, im_range=im_range)
'/Users/ScottEnsel/Desktop/Deep Learning/Project/NEW files/Z_run-010_thumb_index_middle.mat', struct_as_record=False, squeeze_me=True) EMG_data = all_data['z'] # # all_data = sio.loadmat(os.path.join(data_utils.DATA_DIR,data_utils.DATA_SET1), struct_as_record=False, squeeze_me=True) # EMG_data = all_data['z'] THUMB_INDEX = 0 INDEX_INDEX = 1 MIDDLE_INDEX = 2 RING_INDEX = 3 PINKY_INDEX = 4 # new_z = data_utils.preprocess_data(EMG_data, THUMB_INDEX) new_z = data_utils.preprocess_data(EMG_data, INDEX_INDEX) # new_z = data_utils.preprocess_data(EMG_data, MIDDLE_INDEX) y = new_z[:, 0] #seperate labels x = new_z[:, 1:] #seperate features #1:34 #split must be less than 0.5 x_train, x_test, y_train, y_test, y_kf_train_mean = data_split(x, y, split=0.04) training_mean = np.sum(y_train) x_hat = Kalman_filter(x_train, x_test, y_train, y_test)
logger = get_basic_logger() def round_pred(pred): if pred >= 0.5: return 1 else: return 0 if __name__ == '__main__': config_path = "config/main_config.json" config = load_conf(config_path) train_df, test_df = load_data(config.data) train_df = preprocess_data(train_df, train=True) test_df = preprocess_data(test_df, train=False) accuracy = eval_booster(train_df) logger.info(f"Mean Accuracy over 5 folds: {accuracy}") booster = train_booster(train_df.drop(columns=["PassengerId"])) predictions = booster.predict(test_df.drop(columns=["PassengerId"]).values) predictions = [round_pred(pred) for pred in predictions] submission = pd.DataFrame({ "PassengerId": test_df.PassengerId, "Survived": predictions }) submission = submission.astype(int) submission.to_csv("submission.csv", index=False)