def makeSplits(base, trainX, trainY, trainRatio, testX, testY): train_dev_data = read_data(base + trainX) train_dev_labels = load_labels_trec(base + trainY) print("shape of train and dev data ", train_dev_data.shape) print("shape of train and dev labels ", train_dev_labels.shape) print() data_length = train_dev_data.shape[0] train_X = train_dev_data[:int(trainRatio * data_length)] train_Y = train_dev_labels[:int(trainRatio * data_length)] dev_X = train_dev_data[int(trainRatio * data_length):] dev_Y = train_dev_labels[int(trainRatio * data_length):] test_X = read_data(base + testX) test_Y = load_labels_trec(base + testY) print("shape of train data ", train_X.shape) print("shape of train labels ", train_Y.shape) print() print("shape of test data ", test_X.shape) print("shape of test labels ", test_Y.shape) print() return train_X, train_Y, dev_X, dev_Y, test_X, test_Y
def setup_data(train_path, val_path, img_folder_path, batch_size): train = read_data(train_path, img_folder_path) val = read_data(val_path, img_folder_path) train_dataset = create_dataloader(train, batch_size=batch_size, is_train=True, shuffle=True) val_dataset = create_dataloader(val, batch_size=batch_size, is_train=False, shuffle=False) return train_dataset, val_dataset
def readSubj(data0, data1, trainRatio, devRatio): subj_data = read_data(data0) subj_labels = np.repeat([[1, 0]], subj_data.shape[0], axis=0) # Objective data obj_data = read_data(data1) obj_labels = np.repeat([[0, 1]], obj_data.shape[0], axis=0) print("DATA READ") sys.stdout.flush() # Shapes print("shape of positive data ", subj_data.shape) print("shape of positive labels ", subj_labels.shape) print() print("shape of negative data ", obj_data.shape) print("shape of negative labels ", obj_labels.shape) print() # unite data data = merge(subj_data, obj_data) labels = merge(subj_labels, obj_labels) # randomly shuffle data and labels np.random.seed(7) # always the same split shuffle_indices = np.random.permutation(np.arange(len(data))) data_shuffled = data[shuffle_indices] labels_shuffled = labels[shuffle_indices] data_len = data.shape[0] train_index = int(trainRatio * data_len) dev_index = int(devRatio * data_len) test_index = data_len train_X = data[:train_index] dev_X = data[train_index:dev_index] test_X = data[train_index:test_index] train_Y = labels[:train_index] dev_Y = labels[train_index:dev_index] test_Y = labels[train_index:test_index] print("shape of train data ", train_X.shape) print("shape of train labels ", train_Y.shape) print() print("shape of test data ", test_X.shape) print("shape of test labels ", test_Y.shape) return train_X, train_Y, dev_X, dev_Y, test_X, test_Y
def loadTrainDevTest(base, trainX, trainY, devX, devY, testX, testY): train_X = read_data(base + trainX) train_Y = load_labels_pe(base + trainY) dev_X = read_data(base + devX) dev_Y = load_labels_pe(base + devY) test_X = read_data(base + testX) test_Y = load_labels_pe(base + testY) # train print("train data shape ", train_X.shape) print("train labels shape ", train_Y.shape) print() # dev print("dev data shape ", dev_X.shape) print("dev labels shape ", dev_Y.shape) print() # test print("test data shape ", test_X.shape) print("test labels shape ", test_Y.shape) return train_X, train_Y, dev_X, dev_Y, test_X, test_Y
if args.batch_size: batch_size = args.batch_size if args.nb_epochs: nb_epochs = args.nb_epochs if args.lr: lr = args.lr if args.save_path: save_path = args.save_path # Make save_path if save_path is not None: os.makedirs(os.path.join(save_path, 'sequence_models'), exist_ok=True) # Read data smiles, y = read_data(data_path, col_smiles='smiles', col_target='HIV_active') tokens, num_words, max_phrase_len = generate_tokens(smiles, len_percentile=100) # Get train and test set X_train, X_test, y_train, y_test = train_test_split( tokens, y, test_size=config.TEST_RATIO, shuffle=True, stratify=y, random_state=config.SEED) # Build en evaluate graph models model_scores = []
from dash.dependencies import Input, Output from utils.data import read_data from utils.summary_table import summary_table from utils.views import index, business, credit, household, employment, playground from utils.playground import make_charts_for_questions app = dash.Dash( __name__, external_stylesheets=[dbc.themes.BOOTSTRAP], suppress_callback_exceptions=True, ) responses, question_labels = read_data() label_questions = {v: k for k, v in question_labels.items()} raw_data = pd.read_csv("./static/data/raw.csv") unique_type_of_industry = raw_data["TypeofIndustry"].unique() unique_genders = raw_data[label_questions["Gender"]].unique() unique_states = raw_data["State"].unique() app.layout = html.Div( className="container-fluid", style={"padding-right": "0px", "padding-left": "0px"}, children=[dcc.Location(id="url", refresh=False), html.Div(id="body")], )