def test(len1, len2, data, op="RMS"): criterion = nn.MSELoss() # criterion=nn.L1Loss() # criterion=nn.CrossEntropyLoss() ae = AE(len(data.columns), len1, len2, AE_TYPE) if op == "RMS": optimizer = optim.RMSprop(ae.parameters(), lr=0.01, weight_decay=0.5) elif op == "Adam": optimizer = optim.Adam(ae.parameters(), lr=0.1, weight_decay=0.5) ae = train(ae, criterion, optimizer, len1, len2, op) # 查看参数 # for name, param in ae.named_parameters(): # print(name,param) # input() d = data.copy() for index, epoch in enumerate(data.copy().values): # 标记缺失位置 mark_null = [] for i, v in enumerate(epoch): if (np.isnan(v)): mark_null.append(i) epoch[i] = 0 epoch = Variable(torch.from_numpy(epoch.astype(np.double)).double()) ae.double() #epoch=torch.from_numpy(epoch.astype(np.double)).double() pre = ae(epoch.double()) d.values[index] = pre.detach().numpy() return d
def load_wav_to_torch(full_path): """ Loads wavdata into torch array """ sampling_rate, data = read(full_path) # https://github.com/pytorch/pytorch/issues/47160#issue-733792677 return torch.from_numpy(data.copy()).float(), sampling_rate
def KD(data): data_df = data.copy() data_df['min'] = data_df['Low'].rolling(9).min() data_df['max'] = data_df['High'].rolling(9).max() data_df['RSV'] = (data_df['Close'] - data_df['min']) / \ (data_df['max'] - data_df['min']) data_df = data_df.dropna() # 計算K # K的初始值定為50 K_list = [50] for num, rsv in enumerate(list(data_df['RSV'])): K_yestarday = K_list[num] K_today = 2/3 * K_yestarday + 1/3 * rsv K_list.append(K_today) data_df['K'] = K_list[1:] # 計算D # D的初始值定為50 D_list = [50] for num, K in enumerate(list(data_df['K'])): D_yestarday = D_list[num] D_today = 2/3 * D_yestarday + 1/3 * K D_list.append(D_today) data_df['D'] = D_list[1:] use_df = pd.merge(data, data_df[['K', 'D']], left_index=True, right_index=True, how='left') return use_df
def __getitem__(self, index): data = np.asarray(Image.open(self.data_filenames[index])) label = np.asarray(Image.open(self.label_filenames[index])) if self.phase == 'train': num = random.randint(0, 11) flip = num // 4 degree = num % 4 if flip == 1: data = np.flip(data, 0) label = np.flip(label, 0) if flip == 2: data = np.flip(data, 1) label = np.flip(label, 1) if degree != 0: data = rotate(data, 90 * degree) label = rotate(label, 90 * degree) elif self.phase == 'test': pass if self.transform: data = self.transform(data.copy()) label = self.transform(label.copy()) #return data.half(), label.half() return data, label
def __init__(self, data, label, env, shuffle=False): """ Args: data : The eeg signal with shape [N, 1, C, T] env : The index of environment where the data was recorded mode : Indicate procedure status(training or testing) """ self.data = data.copy() self.label = label.copy() self.env = env.copy() #self.labelnum, self.envnum = max(self.label)+1, max(self.env)+1 self.labelnum, self.envnum = len(np.unique(self.label)), len( np.unique(self.env)) print("There are {0} datas".format(len(self.label))) """ iteIndex shows how many pairs in this part of triplet pairs are already taken triPairLen shows how many triplet paris there are in this part triPairIndex shows the index of the part """ #self.make_triplet_pairs() self.dataNums = np.zeros( (max(self.env) + 1, max(self.label) + 1)).astype(int) self.datalen = 0 self.dataNumsAccum = [] self.sub_data_A_ind = [] self.sub_data_N_ind = [] self.sub_data_AP_ind = [] if shuffle: self.shuffle() self.set_length() self.selected = np.zeros(len(self.label))
def __from_dummies__(self, prefix_sep='=', **kwargs): """ Convert encoded columns into original ones """ if 'ext_data' in kwargs: data = kwargs['ext_data'] else: data = self.df categories = self.cat_clm cat_was_num = self.categorical_was_numeric out = data.copy() for l in categories: cols = data.filter(regex="^{}{}".format(l, prefix_sep), axis=1).columns labs = [ cols[i].split(prefix_sep)[-1] for i in range(cols.shape[0]) ] out[l] = pd.Categorical( np.array(labs)[np.argmax(data[cols].values, axis=1)]) out.drop(cols, axis=1, inplace=True) if l in cat_was_num.keys(): out[l] = out[l].astype(cat_was_num[l]) if 'ext_data' in kwargs: return out else: self.df = out
def transform(self, data): data = data.copy().astype('float32') data = (data - self.minn) / (self.maxx - self.minn) * 2 - 1 if self.height * self.height > len(data[0]): padding = np.zeros( (len(data), self.height * self.height - len(data[0]))) data = np.concatenate([data, padding], axis=1) return data.reshape(-1, 1, self.height, self.height)
def get_topk_accuracy_quick(net, k, data): code_0 = get_code_quick(net, torch.tensor(data)) code_1 = get_code_quick(net, torch.tensor(transform_images(data.copy(), 90))) dist = cdist(code_0, code_1) I = dist.argsort(axis=1) b = I[:, 0:k] == np.array(range(code_1.shape[0]), ).reshape( code_1.shape[0], 1).astype(np.int) return np.sum(np.any(b, axis=1)) / code_1.shape[0]
def load_dataset(path, s_label): data = pd.read_csv(path) # Preprocessing taken from https://www.kaggle.com/islomjon/income-prediction-with-ensembles-of-decision-trees # replace missing values with majority class data['workclass'] = data['workclass'].replace('?', 'Private') data['occupation'] = data['occupation'].replace('?', 'Prof-specialty') data['native-country'] = data['native-country'].replace( '?', 'United-States') # education category data.education = data.education.replace([ 'Preschool', '1st-4th', '5th-6th', '7th-8th', '9th', '10th', '11th', '12th' ], 'left') data.education = data.education.replace('HS-grad', 'school') data.education = data.education.replace( ['Assoc-voc', 'Assoc-acdm', 'Prof-school', 'Some-college'], 'higher') data.education = data.education.replace('Bachelors', 'undergrad') data.education = data.education.replace('Masters', 'grad') data.education = data.education.replace('Doctorate', 'doc') # marital status data['marital-status'] = data['marital-status'].replace( ['Married-civ-spouse', 'Married-AF-spouse'], 'married') data['marital-status'] = data['marital-status'].replace([ 'Never-married', 'Divorced', 'Separated', 'Widowed', 'Married-spouse-absent' ], 'not-married') # income data.income = data.income.replace('<=50K', 0) data.income = data.income.replace('>50K', 1) # sex data.gender = data.gender.replace('Male', 0) data.gender = data.gender.replace('Female', 1) # encode categorical values data1 = data.copy() data1 = pd.get_dummies(data1) data1 = data1.drop(['income', s_label], axis=1) X = StandardScaler().fit(data1).transform(data1) y = data['income'].values s = data[s_label].values return X, y, s
def rotate_point_cloud(self, data): """ Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: Nx3 array, original batch of point clouds Return: Nx3 array, rotated batch of point clouds """ rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array([[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]]) shape_pc = data.copy() rotated_data = np.dot(shape_pc, rotation_matrix) return rotated_data
def synchGenerator(): layer_num = 0 all_data = [] for param in netG.parameters(): all_data.append([]) data = param.data.numpy().copy() sendbfr_prev = data.copy() recvbfr = None data = comm.gather(data, root=0) if (rank == 0): #print(sendbfr_prev.dtype) new_weights = np.zeros(sendbfr_prev.shape, sendbfr_prev.dtype) #print(new_weights.dtype) for i in range(1, size): new_weights += data[i] - sendbfr_prev #if (layer_num == 2): # print(sendbfr_prev - data[i]) new_weights /= (size - 1) #if (layer_num == 2): # print(new_weights) new_weights += sendbfr_prev param.data = torch.from_numpy(new_weights) #print("Layer " + str(layer_num) + " has finished aggregating weights") else: pass #print("Node rank " + str(rank) + " has sent generator differences for layer " + str(layer_num)) layer_num += 1 layer_num = 0 for param in netG.parameters(): if (rank == 0): data = param.data.numpy().copy() else: data = None data = comm.bcast(data, root=0) if (rank != 0): param.data = torch.from_numpy(data) #print("Node rank " + str(rank) + " has synched generator layer " + str(layer_num)) layer_num += 1
def __call__(self, data): ''' Calls the transformation. Args: data (dictionary): data dictionary ''' points = data[None] occ = data['occ'] data_out = data.copy() if isinstance(self.N, int): idx = np.random.randint(points.shape[0], size=self.N) data_out.update({ None: points[idx, :], 'occ': occ[idx], }) else: Nt_out, Nt_in = self.N occ_binary = (occ >= 0.5) points0 = points[~occ_binary] points1 = points[occ_binary] idx0 = np.random.randint(points0.shape[0], size=Nt_out) idx1 = np.random.randint(points1.shape[0], size=Nt_in) points0 = points0[idx0, :] points1 = points1[idx1, :] points = np.concatenate([points0, points1], axis=0) occ0 = np.zeros(Nt_out, dtype=np.float32) occ1 = np.ones(Nt_in, dtype=np.float32) occ = np.concatenate([occ0, occ1], axis=0) volume = occ_binary.sum() / len(occ_binary) volume = volume.astype(np.float32) data_out.update({ None: points, 'occ': occ, 'volume': volume, }) return data_out
def get_topk_accuracy(net, k, data): code_0 = get_code( net, torch.utils.data.DataLoader(torch.utils.data.TensorDataset( torch.tensor(data)), batch_size=run.batch_size * 4, drop_last=False)) code_1 = get_code( net, torch.utils.data.DataLoader(torch.utils.data.TensorDataset( torch.tensor(transform_images(data.copy(), 90))), batch_size=run.batch_size * 4, drop_last=False)) dist = cdist(code_0, code_1) I = dist.argsort(axis=1) b = I[:, 0:k] == np.array(range(code_1.shape[0]), ).reshape( code_1.shape[0], 1).astype(np.int) return np.sum(np.any(b, axis=1)) / code_1.shape[0]
def data_from_prediction(data, G, obj_dic): agent_dic = dict() for i in data.true_time.unique(): agent_dic[i] = list(data.predict_loc[data.true_time == i]) data_2 = data.copy() min_time = min(data.time) for idx in data_2.index: if data_2.loc[idx, 'time'] > min_time: #move states down one position data_2.at[idx, 'sample1'] = data.loc[idx - 1, 'query_state'].copy() data_2.at[idx, 'sample2'] = data.loc[idx - 1, 'sample1'].copy() data_2.at[idx, 'sample3'] = data.loc[idx - 1, 'sample2'].copy() data_2.at[idx, 'sample4'] = data.loc[idx - 1, 'sample3'].copy() data_2.at[idx, 'sample5'] = data.loc[idx - 1, 'sample4'].copy() #replace query state by predicted state data_2.at[idx, 'query_state'] = new_state( G, data.loc[idx - 1, 'predict_loc'], obj_dic, agent_dic, data.loc[idx, 'true_time']) direction_vec = [0, 0, 0, 0, 0] direction_vec[int(data.loc[idx - 1, 'prediction'])] = 1 data_2.at[idx, 'sample1'][-1] = direction_vec data_2.at[idx, 'agent_loc'] = data.at[idx - 1, 'predict_loc'] data_2 = data_2.drop(list(data_2[data_2.time == min_time].index)) return data_2
def redandclean(): data = pd.read_csv("cleanedata/cleandata.csv") train=data.copy() print('Training Features shape: ', train.shape) (train['DAYS_BIRTH']/-365).describe() thousand_anomalies = train[(train['DAYS_EMPLOYED']/365>=900) & (train['DAYS_EMPLOYED']/365<=1100)] #Most anomalies were able to repay on time. But how can they be contrasted in relation to non anomalies? # get the index of anomalies and non anomalies anomalies_index = pd.Index(thousand_anomalies.index) non_anomalies_index = train.index.difference(anomalies_index) # get the anomalies records non_anomalies = train.iloc[non_anomalies_index] # get the anomaly targets anomalies_target = thousand_anomalies['TARGET'].value_counts() non_anomalies_target = non_anomalies['TARGET'].value_counts() # find the default rate for anomalies and non anomalies print("Anomalies have a default rate of {}%".format(100*anomalies_target[1]/(anomalies_target[1]+anomalies_target[0]))) # Create an anomalous flag column train['DAYS_EMPLOYED_ANOM'] = train["DAYS_EMPLOYED"] == 365243 # Replace the anomalous values with nan train['DAYS_EMPLOYED'] = train['DAYS_EMPLOYED'].replace({365243: np.nan}) # Looking at the years employed for anomalies from sklearn.preprocessing import Imputer # poly_fitting_vars = ['EXT_SOURCE_3', 'EXT_SOURCE_2', 'EXT_SOURCE_1','DAYS_BIRTH'] imputer = Imputer(missing_values='NaN', strategy='median') # train[poly_fitting_vars] = imputer.fit_transform(train[poly_fitting_vars]) # train[poly_fitting_vars].shape # from sklearn.preprocessing import PolynomialFeatures # poly_feat = PolynomialFeatures(degree=4) # poly_interaction_train = poly_feat.fit_transform(train[poly_fitting_vars]) # train['DIR'] = train['AMT_CREDIT']/train['AMT_INCOME_TOTAL'] # train['AIR'] = train['AMT_ANNUITY']/train['AMT_INCOME_TOTAL'] # train['ACR'] = train['AMT_ANNUITY']/train['AMT_CREDIT'] # train['DAR'] = train['DAYS_EMPLOYED']/train['DAYS_BIRTH'] sensetiveFeatures=['CODE_GENDER', 'NAME_INCOME_TYPE','NAME_FAMILY_STATUS','OCCUPATION_TYPE','ORGANIZATION_TYPE'] X_num=train.copy() X_num=X_num.drop(columns=sensetiveFeatures) target = X_num['TARGET'] X_num=X_num.drop(columns=['TARGET','Unnamed: 0']) X_num = pd.get_dummies(X_num) X_num = imputer.fit_transform(X_num) SenstiveData=train[sensetiveFeatures].copy() a=train.groupby(sensetiveFeatures).count() # print(a['Unnamed: 0'],np.max(a['Unnamed: 0']),np.min(a['Unnamed: 0']), np.mean(a['Unnamed: 0']),np.std(a['Unnamed: 0']) ) #5262 1 24.483358459932738 136.4728162223554 for i in range(1): SensVector=Mytransformer(sensetiveFeatures[i], SenstiveData) model=leaningClassifirofSensetive(X_num, SensVector,nEpoches=20)
def __init__(self, root, classes, memory_classes, memory, task_num, train, transform=None, target_transform=None, download=True): super(iMNIST, self).__init__(root, task_num, transform=transform, target_transform=target_transform, download=download) self.train = train # training set or test set self.root = root self.target_transform = target_transform self.transform = transform if download: self.download() if not self._check_exists(): raise RuntimeError('Dataset not found.' + ' You can use download=True to download it') if self.train: data_file = self.training_file else: data_file = self.test_file self.data, self.targets = torch.load( os.path.join(self.processed_folder, data_file)) self.data = np.array(self.data).astype(np.float32) self.targets = list(np.array(self.targets)) self.train = train # training set or test set if not isinstance(classes, list): classes = [classes] self.class_mapping = {c: i for i, c in enumerate(classes)} self.class_indices = {} for cls in classes: self.class_indices[self.class_mapping[cls]] = [] data = [] targets = [] tt = [] # task module labels td = [] # discriminator labels for i in range(len(self.data)): if self.targets[i] in classes: data.append(self.data[i]) targets.append(self.class_mapping[self.targets[i]]) tt.append(task_num) td.append(task_num + 1) self.class_indices[self.class_mapping[self.targets[i]]].append( i) if self.train: if memory_classes: for task_id in range(task_num): for i in range(len(memory[task_id]['x'])): if memory[task_id]['y'][i] in range( len(memory_classes[task_id])): data.append(memory[task_id]['x'][i]) targets.append(memory[task_id]['y'][i]) tt.append(memory[task_id]['tt'][i]) td.append(memory[task_id]['td'][i]) self.data = data.copy() self.targets = targets.copy() self.tt = tt.copy() self.td = td.copy()
def main(): global i global epoch global loss_sum global running parser = ArgumentParser() # Either define those arguments individually or choose one of the profiles given further down in the code parser.add_argument("-model", type=str, default="dnc", help="Network Model") # Training Details parser.add_argument("-task", type=str, default="babi", help="Task to learn") parser.add_argument( "-n_subbatch", type=str, default="auto", help="Average this much forward passes to a backward pass") parser.add_argument("-max_input_count_per_batch", type=int, default=6000, help="Max batch_size*len that can fit into memory") parser.add_argument("-test_interval", type=int, default=500, help="Run test in this interval") parser.add_argument("-lr", type=float, default=0.0001, help="Learning rate") parser.add_argument("-lr_scheduler", type=str, default="none", help="Define Learning Rate Scheduler") parser.add_argument("-lr_step", type=int, default=10, help="Epochs before lr scheduler does a step") parser.add_argument("-cyc_base", type=float, default=0.0001, help="Base LR for Cyclic LR") parser.add_argument("-cyc_max", type=float, default=0.005, help="Max LR for Cyclic LR") parser.add_argument("-wd", type=float, default=1e-5, help="Weight decay") parser.add_argument("-optimizer", type=str, default="rmsprop", help="Optimizer algorithm") parser.add_argument("-momentum", type=float, default=0.9, help="Momentum for optimizer") parser.add_argument("-preview_interval", type=int, default=10, help="Show preview every nth iteration") parser.add_argument("-info_interval", type=int, default=10, help="Show info every nth iteration") parser.add_argument("-gpu", default="auto", type=str, help="Run on this GPU.") parser.add_argument("-test_on_start", default="0", save=False) parser.add_argument("-test_batch_size", default=16) parser.add_argument("-grad_clip", type=float, default=10.0, help="Max gradient norm") parser.add_argument("-clip_controller", type=float, default=20.0, help="Max gradient norm") # Architectural/Structural Details parser.add_argument("-mem_count", type=int, default=16, help="Number of memory cells") parser.add_argument("-data_word_size", type=int, default=128, help="Memory word size") parser.add_argument("-n_read_heads", type=int, default=1, help="Number of read heads") parser.add_argument("-controller_type", type=str, default="lstm", help="Controller type: lstm or linear") parser.add_argument( "-layer_sizes", type=str, default="256", help="Controller layer sizes. Separate with ,. For example 512,256,256", parser=lambda x: [int(y) for y in x.split(",") if y]) parser.add_argument( "-lstm_use_all_outputs", type=bool, default=1, help= "Use all LSTM outputs as controller output vs use only the last layer") # Csordas / Schmidhuber improvements parser.add_argument( "-dealloc_content", type=bool, default=1, help= "Deallocate memory content, unlike DNC, which leaves it unchanged, just decreases the usage counter, causing problems with lookup" ) parser.add_argument( "-sharpness_control", type=bool, default=1, help="Distribution sharpness control for forward and backward links") # Logs, Savefiles, Debug parser.add_argument("-debug", type=bool, default=1, help="Enable debugging") parser.add_argument("-debug_log", type=bool, default=0, help="Enable debug log") parser.add_argument("-name", type=str, help="Save training to this directory") parser.add_argument("-save_interval", type=int, default=500, help="Save network every nth iteration") parser.add_argument("-masked_lookup", type=bool, default=1, help="Enable masking in content lookups") parser.add_argument("-mask_min", default=0.0) parser.add_argument( "-visport", type=int, default=-1, help="Port to run Visdom server on. -1 to disable") # Visualisation parser.add_argument("-dump_profile", type=str, save=False) parser.add_argument("-dump_heatmaps", default=False, save=False) parser.add_argument("-noargsave", type=bool, default=False, help="Do not save modified arguments", save=False) parser.add_argument("-demo", type=bool, default=False, help="Do a single step with fixed seed", save=False) parser.add_argument( "-exit_after", type=int, help="Exit after this amount of steps. Useful for debugging.", save=False) # NLP Tasks, BaBi parser.add_argument( "-run_on_fraction", type=int, default=0, help="If >1, only 1/this part of the datasets will be used") parser.add_argument("-embedding_size", type=int, default=256, help="Size of word embedding for NLP tasks") parser.add_argument("-dataset_path", type=str, default="/storage/remote/atcremers45/s0238/", parser=ArgumentParser.str_or_none(), help="Specify babi path manually") parser.add_argument("-babi_train_tasks", type=str, default="none", parser=ArgumentParser.list_or_none(type=str), help="babi task list to use for training") parser.add_argument("-babi_test_tasks", type=str, default="none", parser=ArgumentParser.list_or_none(type=str), help="babi task list to use for testing") parser.add_argument("-babi_train_sets", type=str, default="train", parser=ArgumentParser.list_or_none(type=str), help="babi train sets to use") parser.add_argument("-babi_test_sets", type=str, default="test", parser=ArgumentParser.list_or_none(type=str), help="babi test sets to use") parser.add_argument( "-think_steps", type=int, default=0, help="Iddle steps before requiring the answer (for bAbi)") parser.add_argument("-load", type=str, save=False) # TODO: What does this do? parser.add_argument("-print_test", default=False, save=False) # Copy Task parser.add_argument("-bit_w", type=int, default=8, help="Bit vector length for copy task") parser.add_argument("-block_w", type=int, default=3, help="Block width to associative recall task") parser.add_argument("-len", type=str, default="4", help="Sequence length for copy task", parser=lambda x: [int(a) for a in x.split("-")]) parser.add_argument("-repeat", type=str, default="1", help="Sequence length for copy task", parser=lambda x: [int(a) for a in x.split("-")]) parser.add_argument("-batch_size", type=int, default=16, help="Sequence length for copy task") parser.add_profile([ ArgumentParser.Profile("babi", { "preview_interval": 10, "save_interval": 500, "task": "babi", "mem_count": 256, "data_word_size": 64, "n_read_heads": 4, "layer_sizes": "256", "controller_type": "lstm", "lstm_use_all_outputs": True, "momentum": 0.9, "embedding_size": 128, "test_interval": 5000, "think_steps": 3, "batch_size": 2 }, include=["dnc-msd"]), ArgumentParser.Profile( "repeat_copy", { "bit_w": 8, "repeat": "1-8", "len": "2-14", "task": "copy", "think_steps": 1, "preview_interval": 10, "info_interval": 10, "save_interval": 100, "data_word_size": 16, "layer_sizes": "32", "n_subbatch": 1, "controller_type": "lstm", }), ArgumentParser.Profile("repeat_copy_simple", { "repeat": "1-3", }, include="repeat_copy"), ArgumentParser.Profile( "dnc", { "masked_lookup": False, "sharpness_control": False, "dealloc_content": False }), ArgumentParser.Profile( "dnc-m", { "masked_lookup": True, "sharpness_control": False, "dealloc_content": False }), ArgumentParser.Profile( "dnc-s", { "masked_lookup": False, "sharpness_control": True, "dealloc_content": False }), ArgumentParser.Profile( "dnc-d", { "masked_lookup": False, "sharpness_control": False, "dealloc_content": True }), ArgumentParser.Profile( "dnc-md", { "masked_lookup": True, "sharpness_control": False, "dealloc_content": True }), ArgumentParser.Profile( "dnc-ms", { "masked_lookup": True, "sharpness_control": True, "dealloc_content": False }), ArgumentParser.Profile( "dnc-sd", { "masked_lookup": False, "sharpness_control": True, "dealloc_content": True }), ArgumentParser.Profile( "dnc-msd", { "masked_lookup": True, "sharpness_control": True, "dealloc_content": True }), ArgumentParser.Profile( "keyvalue", { "repeat": "1", "len": "2-16", "mem_count": 16, "task": "keyvalue", "think_steps": 1, "preview_interval": 10, "info_interval": 10, "data_word_size": 32, "bit_w": 12, "save_interval": 1000, "layer_sizes": "32" }), ArgumentParser.Profile("keyvalue2way", { "task": "keyvalue2way", }, include="keyvalue"), ArgumentParser.Profile( "associative_recall", { "task": "recall", "bit_w": 8, "len": "2-16", "mem_count": 64, "data_word_size": 32, "n_read_heads": 1, "layer_sizes": "128", "controller_type": "lstm", "lstm_use_all_outputs": 1, "think_steps": 1, "mask_min": 0.1, "info_interval": 10, "save_interval": 1000, "preview_interval": 10, "n_subbatch": 1, }) ]) opt = parser.parse() assert opt.name is not None, "Training dir (-name parameter) not given" opt = parser.sync(os.path.join(opt.name, "args.json"), save=not opt.noargsave) if opt.demo: Seed.fix() os.makedirs(os.path.join(opt.name, "save"), exist_ok=True) os.makedirs(os.path.join(opt.name, "preview"), exist_ok=True) gpu_allocator.use_gpu(opt.gpu) debug.enableDebug = opt.debug_log if opt.visport > 0: Visdom.start(opt.visport) class LengthHackSampler: """ I don't know exactly what it is needed for, but an object of this class can return a generator object that, when iterated over, always yields a list with n elements off the same value, m, where n=batch_size and m=length. Only used in BitMapTaskRepeater task """ def __init__(self, batch_size, length): self.length = length self.batch_size = batch_size def __iter__(self): while True: len = self.length() if callable(self.length) else self.length yield [len] * self.batch_size def __len__(self): return 0x7FFFFFFF embedding = None test_set = None curriculum = None loader_reset = False # Check the task and initialize dataset and metaparameters if opt.task == "copy": dataset = CopyData(bit_w=opt.bit_w) in_size = opt.bit_w + 1 out_size = in_size elif opt.task == "recall": dataset = AssociativeRecall(bit_w=opt.bit_w, block_w=opt.block_w) in_size = opt.bit_w + 2 out_size = in_size elif opt.task == "keyvalue": assert opt.bit_w % 2 == 0, "Key-value datasets works only with even bit_w" dataset = KeyValue(bit_w=opt.bit_w) in_size = opt.bit_w + 1 out_size = opt.bit_w // 2 elif opt.task == "keyvalue2way": assert opt.bit_w % 2 == 0, "Key-value datasets works only with even bit_w" dataset = KeyValue2Way(bit_w=opt.bit_w) in_size = opt.bit_w + 2 out_size = opt.bit_w // 2 elif opt.task == "babi": dataset = bAbiDataset(think_steps=opt.think_steps, dir_name=opt.dataset_path, name="Train") test_set = bAbiDataset(think_steps=opt.think_steps, dir_name=opt.dataset_path, name="Validation") dataset.use(opt.babi_train_tasks, opt.babi_train_sets) in_size = opt.embedding_size print("bAbi: loaded total of %d sequences." % len(dataset)) test_set.use(opt.babi_test_tasks, opt.babi_test_sets) out_size = len(dataset.vocabulary) print("bAbi: using %d sequences for training, %d for testing" % (len(dataset), len(test_set))) elif opt.task in ["ptb", "PTB"]: dataset = PTB('test', seq_len=15) test_set = PTB('validation', seq_len=15) in_size = opt.embedding_size print("Loaded dateset with {d} and test set with {t} elements".format( d=len(dataset), t=len(test_set))) out_size = len(dataset.vocabulary) print("PTB: using a total vocabulary of {} words".format(out_size)) else: assert False, "Invalid task: %s" % opt.task if opt.task in ["babi"]: print("Babi Batchsize: ", opt.batch_size, "Test Batchsize: ", opt.test_batch_size) data_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True, shuffle=True, collate_fn=MetaCollate()) test_loader = torch.utils.data.DataLoader( test_set, batch_size=opt.test_batch_size, num_workers=opt.test_batch_size, pin_memory=True, shuffle=False, collate_fn=MetaCollate() if test_set is not None else None) elif opt.task in ["ptb", 'PTB']: if opt.run_on_fraction > 1: sampler = torch.utils.data.SequentialSampler( list(range(0, len(dataset), opt.run_on_fraction))) data_loader = torch.utils.data.DataLoader( dataset, batch_size=opt.batch_size, sampler=sampler, collate_fn=MetaCollate()) test_loader = torch.utils.data.DataLoader( test_set, batch_size=opt.test_batch_size, sampler=sampler, collate_fn=MetaCollate()) else: data_loader = torch.utils.data.DataLoader( dataset, batch_size=opt.batch_size, shuffle=True, collate_fn=MetaCollate()) test_loader = torch.utils.data.DataLoader( test_set, batch_size=opt.test_batch_size, shuffle=False, collate_fn=MetaCollate()) else: dataset = BitmapTaskRepeater(dataset) lhs = LengthHackSampler( opt.batch_size, BitmapTaskRepeater.key_sampler(opt.len, opt.repeat)) data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=lhs, num_workers=1, pin_memory=True) # Setting up the controller for the DNC if opt.controller_type == "lstm": controller_constructor = functools.partial( LSTMController, out_from_all_layers=opt.lstm_use_all_outputs) elif opt.controller_type == "linear": controller_constructor = FeedforwardController else: assert False, "Invalid controller: %s" % opt.controller_type device = torch.device('cuda') if opt.gpu != "none" else torch.device("cpu") print("DEVICE: ", device) print("Current model: ", opt.model) if opt.model.lower() == 'dnc': model = DNC(in_size, out_size, opt.data_word_size, opt.mem_count, opt.n_read_heads, controller_constructor(opt.layer_sizes), batch_first=True, mask=opt.masked_lookup, dealloc_content=opt.dealloc_content, link_sharpness_control=opt.sharpness_control, mask_min=opt.mask_min, clip_controller=opt.clip_controller) elif opt.model.lower() == 'lstm': model = NLP_LSTM(out_size, in_size, sentence_length=10, device=device) elif opt.model.lower() == 'entnet': print(opt.task) model = RecurrentEntityNetwork(vocabulary_size=out_size, embedding_dim=in_size, sentence_lenght=10, device=device, task=opt.task) else: raise ValueError("Invalid model: {}".format(opt.model)) params = [{ 'params': [p for n, p in model.named_parameters() if not n.endswith(".bias")] }, { 'params': [p for n, p in model.named_parameters() if n.endswith(".bias")], 'weight_decay': 0 }] if isinstance(dataset, NLPTask): embedding = torch.nn.Embedding(len(dataset.vocabulary), in_size).to(device) params.append({'params': embedding.parameters(), 'weight_decay': 0}) if opt.optimizer == "sgd": optimizer = torch.optim.SGD(params, lr=opt.lr, weight_decay=opt.wd, momentum=opt.momentum) elif opt.optimizer == "adam": optimizer = torch.optim.Adam(params, lr=opt.lr, weight_decay=opt.wd) elif opt.optimizer == "rmsprop": optimizer = torch.optim.RMSprop(params, lr=opt.lr, weight_decay=opt.wd, momentum=opt.momentum, eps=1e-10) else: assert "Invalid optimizer: %s" % opt.optimizer lr_scheduler = None if opt.lr_scheduler == 'step': lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, opt.lr_step, gamma=0.5) elif opt.lr_scheduler == 'cyclic': lr_scheduler = torch.optim.lr_scheduler.CyclicLR( optimizer, opt.cyc_base, opt.cyc_max, opt.lr_step) n_params = sum([sum([t.numel() for t in d['params']]) for d in params]) model = model.to(device) if embedding is not None and hasattr(embedding, "to"): embedding = embedding.to(device) i = 0 epoch = 0 loss_sum = 0 Visdom.Text("Information").set( '<b>Name:</b> {n}<br><b>Batchsize:</b> {b}<br><b>Train Task:</b> {tt}, {tdp} data points <br>' '<b>Validation Task:</b> {vt}, {vdp} data points<br><b>Running on:</b> {device}<br>' '<b>Parameters:</b> {np}<br><b>Model:</b> {m}<br><b>Optimizer:</b> {opt}<br>' '<b>Initial LR:</b> {ilr}<br><b>Weight Decay:</b> {wd}<br>' '<b>Learning rate scheduler:</b> {lrs}<br><b>Start Time:</b> {dt}'. format(n=opt.name, b=opt.batch_size, tt=opt.babi_train_tasks or opt.task, vt=opt.babi_test_tasks or opt.task, tdp=len(dataset), vdp=len(test_set), device=device, np=n_params, m=opt.model, opt=opt.optimizer, ilr=opt.lr, wd=opt.wd, lrs=opt.lr_scheduler, dt=datetime.now().strftime("%a %d/%m/%Y, %H:%M"))) loss_plot = Visdom.Plot2D("Train Loss", store_interval=opt.info_interval, xlabel="iterations", ylabel="loss") test_loss_plot = Visdom.Plot2D("Validation Loss", store_interval=1, xlabel="Epoch", ylabel="Loss") ppl_plot = Visdom.Plot2D("Perplexity on Validation Data", store_interval=1, xlabel="Epoch", ylabel="Perplexity") lr_plot = Visdom.Plot2D("Learning Rate", store_interval=1, xlabel="epochs", ylabel="Learning Rate") if curriculum is not None: curriculum_plot = Visdom.Plot2D( "curriculum lesson" + (" (last %d)" % (curriculum.n_lessons - 1) if curriculum.n_lessons is not None else ""), xlabel="iterations", ylabel="lesson") curriculum_accuracy = Visdom.Plot2D("curriculum accuracy", xlabel="iterations", ylabel="accuracy") saver = Saver(os.path.join(opt.name, "save"), short_interval=opt.save_interval) saver.register("model", StateSaver(model)) saver.register("optimizer", StateSaver(optimizer)) saver.register("i", GlobalVarSaver("i")) saver.register("epoch", GlobalVarSaver("epoch")) saver.register("loss_sum", GlobalVarSaver("loss_sum")) saver.register("loss_plot", StateSaver(loss_plot)) saver.register("lr_plot", StateSaver(lr_plot)) saver.register("train_loss_plot", StateSaver(test_loss_plot)) saver.register("ppl_plot", StateSaver(ppl_plot)) saver.register("dataset", StateSaver(dataset)) if lr_scheduler: saver.register("lr_scheduler", StateSaver(lr_scheduler)) if test_set: pass # saver.register("test_set", StateSaver(test_set)) if curriculum is not None: saver.register("curriculum", StateSaver(curriculum)) saver.register("curriculum_plot", StateSaver(curriculum_plot)) saver.register("curriculum_accuracy", StateSaver(curriculum_accuracy)) if isinstance(dataset, NLPTask): saver.register("word_embeddings", StateSaver(embedding)) elif embedding is not None: saver.register("embeddings", StateSaver(embedding)) if not saver.load(opt.load): model.reset_parameters() if embedding is not None: embedding.reset_parameters() visualizers = {} debug_schemas = { "read_head": { "list_dim": 2 }, "temporal_links/forward_dists": { "list_dim": 2 }, "temporal_links/backward_dists": { "list_dim": 2 } } def plot_debug(debug, prefix="", schema={}): if debug is None: return for k, v in debug.items(): curr_name = prefix + k if curr_name in debug_schemas: curr_schema = schema.copy() curr_schema.update(debug_schemas[curr_name]) else: curr_schema = schema if isinstance(v, dict): plot_debug(v, curr_name + "/", curr_schema) continue data = v[0] if curr_schema.get("list_dim", -1) > 0: if data.ndim != 3: print( "WARNING: unknown data shape for array display: %s, tensor %s" % (data.shape, curr_name)) continue n_steps = data.shape[curr_schema["list_dim"] - 1] if curr_name not in visualizers: visualizers[curr_name] = [ Visdom.Heatmap( curr_name + "_%d" % i, dumpdir=os.path.join(opt.name, "preview") if opt.dump_heatmaps else None) for i in range(n_steps) ] for i in range(n_steps): visualizers[curr_name][i].draw( index_by_dim(data, curr_schema["list_dim"] - 1, i)) else: if data.ndim != 2: print( "WARNING: unknown data shape for simple display: %s, tensor %s" % (data.shape, curr_name)) continue if curr_name not in visualizers: visualizers[curr_name] = Visdom.Heatmap( curr_name, dumpdir=os.path.join(opt.name, "preview") if opt.dump_heatmaps else None) visualizers[curr_name].draw(data) def run_model(input, debug=None): if isinstance(dataset, NLPTask): input = input["input"] else: input = input["input"] * 2.0 - 1.0 full = False if opt.task in ['PTB', 'ptb'] else True return model(input, embed=embedding, full=full) # debug=debug def multiply_grads(params, mul): if mul == 1: return for pa in params: for p in pa["params"]: p.grad.data *= mul def test(): if test_set is None: return start_time = time.time() t = test_set.start_test() test_loss = [] with torch.no_grad(): for data in tqdm(test_loader): data = { k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items() } if hasattr(dataset, "prepare"): data = dataset.prepare(data) net_out = run_model(data) test_set.verify_result(t, data, net_out) test_loss.append( dataset.loss(net_out, data["output"]).data.item()) avg_loss = sum(test_loss) / len(test_loss) perplexity = math.exp(avg_loss) test_loss_plot.add_point(epoch, avg_loss) if epoch > 5: # Perplexity is immensely high in the beginning ppl_plot.add_point(epoch, perplexity) test_set.show_test_results(epoch, t) print("Test done in %gs" % (time.time() - start_time)) # def test_on_train(train_data): # with torch.no_grad(): # net_out = run_model(train_data) print("Test interval: ", opt.test_interval) if opt.test_on_start.lower() in ["on", "1", "true", "quit"]: test() if opt.test_on_start.lower() == "quit": saver.write(i) sys.exit(-1) if opt.print_test: model.eval() total = 0 correct = 0 with torch.no_grad(): for data in tqdm(test_loader): if not running: return data = { k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items() } if hasattr(test_set, "prepare"): data = test_set.prepare(data) net_out = run_model(data) c, t = test_set.curriculum_measure(net_out, data["output"]) total += t correct += c print("Test result: %2.f%% (%d out of %d correct)" % (100.0 * correct / total, correct, total)) model.train() return iter_start_time = time.time() if i % opt.info_interval == 0 else None data_load_total_time = 0 start_i = i if opt.dump_profile: profiler = torch.autograd.profiler.profile(use_cuda=True) if opt.dump_heatmaps: dataset.set_dump_dir(os.path.join(opt.name, "preview")) @preview() def do_visualize(raw_data, output, pos_map, debug): if pos_map is not None: output = embedding.backmap_output(output, pos_map, raw_data["output"].shape[1]) dataset.visualize_preview(raw_data, output) if debug is not None: plot_debug(debug) preview_timer = OnceEvery(opt.preview_interval) pos_map = None start_iter = i if curriculum is not None: curriculum.init() """ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! !! !! !!! !!! !!!! Actual Running Mode starts here !!!! !!! !!! !! !! ! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """ while running: data_load_timer = time.time() epoch += 1 avg_acc = {} print("Epoch {e}".format(e=epoch)) for data in data_loader: if not running: break if loader_reset: print("Loader reset requested. Resetting...") loader_reset = False if curriculum is not None: curriculum.lesson_started() break if opt.dump_profile: if i == start_i + 1: print("Starting profiler") profiler.__enter__() elif i == start_i + 5 + 1: print("Stopping profiler") profiler.__exit__(None, None, None) print("Average stats") print(profiler.key_averages().table("cpu_time_total")) print("Writing trace to file") profiler.export_chrome_trace(opt.dump_profile) print("Done.") sys.exit(0) else: print("Step %d out of 5" % (i - start_i)) debug.dbg_print("-------------------------------------") raw_data = data data = { k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items() } # Transform generic torch tensor to right device torch tensor if hasattr(dataset, "prepare"): data = dataset.prepare(data) data_load_total_time += time.time() - data_load_timer need_preview = preview_timer() debug_data = {} if opt.debug and need_preview else None optimizer.zero_grad() if opt.n_subbatch == "auto": n_subbatch = math.ceil(data["input"].numel() / opt.max_input_count_per_batch) else: n_subbatch = int(opt.n_subbatch) real_batch = max(math.floor(opt.batch_size / n_subbatch), 1) n_subbatch = math.ceil(opt.batch_size / real_batch) remaining_batch = opt.batch_size % real_batch for subbatch in range(n_subbatch): if not running: break input = data["input"] target = data["output"] # print(input.shape, target.shape) if n_subbatch != 1 and (subbatch * real_batch < input.shape[0]): # print("from to: ", subbatch*real_batch, (subbatch+1)*real_batch) input = input[subbatch * real_batch:(subbatch + 1) * real_batch] target = target[subbatch * real_batch:(subbatch + 1) * real_batch] # print(input.shape, target.shape) f2 = data.copy() f2["input"] = input output = run_model( f2 ) # debug=debug_data if subbatch == n_subbatch - 1 else None # on shape: Batchsize x longest_sequence_length # out shape: Batchsize x longest_sequence_length x Vocablury length l = dataset.loss(output, target) # print("remaining", remaining_batch) try: debug.nan_check(l, force=True) except SystemExit: print("in and out : ", input.shape, input, output.shape, output) print("subbatch in nsub realbatch", subbatch, n_subbatch, real_batch) print("f2", f2) print("data", data) print("expected out and in 2: ", f2['output'].shape, f2['input'].shape) print("expected out and in 1: ", data['output'].shape, data['input'].shape) print("remaining batch", remaining_batch) print("NaN check not passed") sys.exit(1) l.backward() if curriculum is not None: curriculum.update( *dataset.curriculum_measure(output, target)) if remaining_batch != 0 and subbatch == n_subbatch - 2: multiply_grads(params, real_batch / remaining_batch) if n_subbatch != 1: if remaining_batch == 0: multiply_grads(params, 1 / n_subbatch) else: multiply_grads(params, remaining_batch / opt.batch_size) for p in params: try: torch.nn.utils.clip_grad_norm_(p["params"], opt.grad_clip) except RuntimeError: pass # lstm cannot handle this right now optimizer.step() i += 1 curr_loss = l.data.item() loss_plot.add_point(i, curr_loss) loss_sum += curr_loss if i % opt.info_interval == 0: tim = time.time() loss_avg = loss_sum / opt.info_interval if curriculum is not None: curriculum_accuracy.add_point(i, curriculum.get_accuracy()) curriculum_plot.add_point(i, curriculum.step) message = "Iteration %d, loss: %.4f" % (i, loss_avg) if iter_start_time is not None: message += " (%.2f ms/iter, load time %.2g ms/iter, visport: %s)" % ( (tim - iter_start_time) / opt.info_interval * 1000.0, data_load_total_time / opt.info_interval * 1000.0, Visdom.port) print(message) iter_start_time = tim loss_sum = 0 data_load_total_time = 0 debug.dbg_print("Iteration %d, loss %g" % (i, curr_loss)) if need_preview: do_visualize(raw_data, output, pos_map, debug_data) dataset.verify_result(avg_acc, data, output) debug_tick = saver.tick(i) if opt.demo and opt.exit_after is None: running = False input("Press enter to quit.") if opt.exit_after is not None and (i - start_iter) >= opt.exit_after: running = False data_load_timer = time.time() if running: # Once every epoch test() for param_g in optimizer.param_groups: lr_plot.add_point(epoch, param_g['lr']) break dataset.show_test_results(epoch, avg_acc, x_label='Epoch') if lr_scheduler: lr_scheduler.step()
def upsnet_test(): pprint.pprint(config) logger.info('test config:{}\n'.format(pprint.pformat(config))) # create models gpus = [int(_) for _ in config.gpus.split(',')] test_model = eval(config.symbol)().cuda(device=gpus[0]) # create data loader test_dataset = eval(config.dataset.dataset)( image_sets=config.dataset.test_image_set.split('+'), flip=False, result_path=final_output_path, phase='test') test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.test.batch_size, shuffle=False, num_workers=0, drop_last=False, pin_memory=False, collate_fn=test_dataset.collate) if args.eval_only: results = pickle.load( open( os.path.join(final_output_path, 'results', 'results_list.pkl'), 'rb')) if config.test.vis_mask: test_dataset.vis_all_mask( results['all_boxes'], results['all_masks'], os.path.join(final_output_path, 'results', 'vis')) if config.network.has_rcnn: test_dataset.evaluate_boxes( results['all_boxes'], os.path.join(final_output_path, 'results')) if config.network.has_mask_head: test_dataset.evaluate_masks( results['all_boxes'], results['all_masks'], os.path.join(final_output_path, 'results')) if config.network.has_fcn_head: test_dataset.evaluate_ssegs( results['all_ssegs'], os.path.join(final_output_path, 'results', 'ssegs')) # logging.info('combined pano result:') # test_dataset.evaluate_panoptic(test_dataset.get_combined_pan_result(results['all_ssegs'], results['all_boxes'], results['all_masks'], stuff_area_limit=config.test.panoptic_stuff_area_limit), os.path.join(final_output_path, 'results', 'pans_combined')) if config.network.has_panoptic_head: logging.info('unified pano result:') test_dataset.evaluate_panoptic( test_dataset.get_unified_pan_result( results['all_ssegs'], results['all_panos'], results['all_pano_cls_inds'], stuff_area_limit=config.test.panoptic_stuff_area_limit), os.path.join(final_output_path, 'results', 'pans_unified')) sys.exit() # preparing curr_iter = config.test.test_iteration if args.weight_path == '': test_model.load_state_dict(torch.load( os.path.join( os.path.join( os.path.join(config.output_path, os.path.basename(args.cfg).split('.')[0]), '_'.join(config.dataset.image_set.split('+')), config.model_prefix + str(curr_iter) + '.pth'))), resume=True) else: test_model.load_state_dict(torch.load(args.weight_path), resume=True) for p in test_model.parameters(): p.requires_grad = False test_model = DataParallel(test_model, device_ids=gpus, gather_output=False).to(gpus[0]) # start training test_model.eval() i_iter = 0 idx = 0 test_iter = test_loader.__iter__() all_boxes = [[] for _ in range(test_dataset.num_classes)] if config.network.has_mask_head: all_masks = [[] for _ in range(test_dataset.num_classes)] if config.network.has_fcn_head: all_ssegs = [] if config.network.has_panoptic_head: all_panos = [] all_pano_cls_inds = [] panos = [] data_timer = Timer() net_timer = Timer() post_timer = Timer() # while i_iter < len(test_loader): while i_iter < 5: data_timer.tic() batch = [] labels = [] for gpu_id in gpus: try: data, label, _ = test_iter.next() if label is not None: data['roidb'] = label['roidb'] for k, v in data.items(): data[k] = v.pin_memory().to( gpu_id, non_blocking=True) if torch.is_tensor(v) else v except StopIteration: data = data.copy() for k, v in data.items(): data[k] = v.pin_memory().to( gpu_id, non_blocking=True) if torch.is_tensor(v) else v batch.append((data, None)) labels.append(label) i_iter += 1 im_infos = [_[0]['im_info'][0] for _ in batch] data_time = data_timer.toc() if i_iter > 10: net_timer.tic() with torch.no_grad(): output = test_model(*batch) torch.cuda.synchronize() if i_iter > 10: net_time = net_timer.toc() else: net_time = 0 output = im_detect(output, batch, im_infos) post_timer.tic() for score, box, mask, cls_idx, im_info in zip(output['scores'], output['boxes'], output['masks'], output['cls_inds'], im_infos): im_post(all_boxes, all_masks, score, box, mask, cls_idx, test_dataset.num_classes, np.round(im_info[:2] / im_info[2]).astype(np.int32)) idx += 1 if config.network.has_fcn_head: for i, sseg in enumerate(output['ssegs']): sseg = sseg.squeeze(0).astype( 'uint8')[:int(im_infos[i][0]), :int(im_infos[i][1])] all_ssegs.append( cv2.resize(sseg, None, None, fx=1 / im_infos[i][2], fy=1 / im_infos[i][2], interpolation=cv2.INTER_NEAREST)) if config.network.has_panoptic_head: pano_cls_inds = [] for i, (pano, cls_ind) in enumerate( zip(output['panos'], output['pano_cls_inds'])): pano = pano.squeeze(0).astype( 'uint8')[:int(im_infos[i][0]), :int(im_infos[i][1])] panos.append( cv2.resize(pano, None, None, fx=1 / im_infos[i][2], fy=1 / im_infos[i][2], interpolation=cv2.INTER_NEAREST)) pano_cls_inds.append(cls_ind) all_panos.extend(panos) panos = [] all_pano_cls_inds.extend(pano_cls_inds) post_time = post_timer.toc() s = 'Batch %d/%d, data_time:%.3f, net_time:%.3f, post_time:%.3f' % ( idx, len(test_dataset), data_time, net_time, post_time) logging.info(s) results = [] # trim redundant predictions for i in range(1, test_dataset.num_classes): all_boxes[i] = all_boxes[i][:len(test_loader)] if config.network.has_mask_head: all_masks[i] = all_masks[i][:len(test_loader)] if config.network.has_fcn_head: all_ssegs = all_ssegs[:len(test_loader)] if config.network.has_panoptic_head: all_panos = all_panos[:len(test_loader)] os.makedirs(os.path.join(final_output_path, 'results'), exist_ok=True) results = { 'all_boxes': all_boxes, 'all_masks': all_masks if config.network.has_mask_head else None, 'all_ssegs': all_ssegs if config.network.has_fcn_head else None, 'all_panos': all_panos if config.network.has_panoptic_head else None, 'all_pano_cls_inds': all_pano_cls_inds if config.network.has_panoptic_head else None, } with open(os.path.join(final_output_path, 'results', 'results_list.pkl'), 'wb') as f: pickle.dump(results, f, protocol=2) if config.test.vis_mask: test_dataset.vis_all_mask( all_boxes, all_masks, os.path.join(final_output_path, 'results', 'vis')) else: test_dataset.evaluate_boxes(all_boxes, os.path.join(final_output_path, 'results')) if config.network.has_mask_head: test_dataset.evaluate_masks( all_boxes, all_masks, os.path.join(final_output_path, 'results')) if config.network.has_panoptic_head: logging.info('unified pano result:') test_dataset.evaluate_panoptic( test_dataset.get_unified_pan_result( all_ssegs, all_panos, all_pano_cls_inds, stuff_area_limit=config.test.panoptic_stuff_area_limit), os.path.join(final_output_path, 'results', 'pans_unified')) if config.network.has_fcn_head: test_dataset.evaluate_ssegs( all_ssegs, os.path.join(final_output_path, 'results', 'ssegs'))
def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: data = inputs.horizontal_concat(outputs) data = data.copy() # mark datetime column times = data.metadata.list_columns_with_semantic_types( ( "https://metadata.datadrivendiscovery.org/types/Time", "http://schema.org/DateTime", ) ) if len(times) != 1: raise ValueError( f"There are {len(times)} indices marked as datetime values. Please only specify one" ) self._time_column = list(data)[times[0]] # if datetime columns are integers, parse as # of days if ( "http://schema.org/Integer" in inputs.metadata.query_column(times[0])["semantic_types"] ): self._integer_time = True data[self._time_column] = pd.to_datetime( data[self._time_column] - 1, unit="D" ) else: data[self._time_column] = pd.to_datetime( data[self._time_column], unit="s" ) # sort by time column data = data.sort_values(by=[self._time_column]) # mark key and grp variables self.key = data.metadata.get_columns_with_semantic_type( "https://metadata.datadrivendiscovery.org/types/PrimaryKey" ) # mark target variables self._targets = data.metadata.list_columns_with_semantic_types( ( "https://metadata.datadrivendiscovery.org/types/SuggestedTarget", "https://metadata.datadrivendiscovery.org/types/TrueTarget", "https://metadata.datadrivendiscovery.org/types/Target", ) ) self._target_types = [ "i" if "http://schema.org/Integer" in data.metadata.query_column(t)["semantic_types"] else "c" if "https://metadata.datadrivendiscovery.org/types/CategoricalData" in data.metadata.query_column(t)["semantic_types"] else "f" for t in self._targets ] self._targets = [list(data)[t] for t in self._targets] self.target_column = self._targets[0] # see if 'GroupingKey' has been marked # otherwise fall through to use 'SuggestedGroupingKey' grouping_keys = data.metadata.get_columns_with_semantic_type( "https://metadata.datadrivendiscovery.org/types/GroupingKey" ) suggested_grouping_keys = data.metadata.get_columns_with_semantic_type( "https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey" ) if len(grouping_keys) == 0: grouping_keys = suggested_grouping_keys drop_list = [] else: drop_list = suggested_grouping_keys grouping_keys_counts = [ data.iloc[:, key_idx].nunique() for key_idx in grouping_keys ] grouping_keys = [ group_key for count, group_key in sorted(zip(grouping_keys_counts, grouping_keys)) ] self.filter_idxs = [list(data)[key] for key in grouping_keys] # drop index data.drop( columns=[list(data)[i] for i in drop_list + self.key], inplace=True ) # check whether no grouping keys are labeled if len(grouping_keys) == 0: concat = pd.concat([data[self._time_column], data[self.target_column]], axis=1) concat.columns = ['ds', 'y'] concat['unique_id'] = 'series1' # We have only one series else: # concatenate columns in `grouping_keys` to unique_id column concat = data.loc[:, self.filter_idxs].apply(lambda x: ' '.join([str(v) for v in x]), axis=1) concat = pd.concat([concat, data[self._time_column], data[self.target_column]], axis=1) concat.columns = ['unique_id', 'ds', 'y'] # Series must be complete in the frequency concat = DeepMarkovModelPrimitive._ffill_missing_dates_per_serie(concat, 'D') # remove duplicates concat = concat.drop_duplicates(['unique_id', 'ds']) self._training_inputs = concat
def __data_inverse_vae_sampling(self, data_row, num_samples, dataset="german"): """ New sampling method which makes use of the trained variational autoencoder. Args: data_row: 1d numpy array, corresponding to a row num_samples: size of the neighborhood to learn the linear model dataset: Dataset on which the variational autoencoder was trained. Returns: A tuple (data, inverse), where: data: dense num_samples * K matrix, where categorical features are encoded with either 0 (not equal to the corresponding value in data_row) or 1. The first row is the original instance. inverse: same as data, except the categorical features are not binary, but categorical (as the original data) Raises: FileNotFoundError The VAE model of the given dataset can not be found and loaded. """ # NEXT STEPS # Later: Categorical sampling improvements? https://blog.evjang.com/2016/11/tutorial-categorical-variational.html # Or look into the numerical features only? import torch.utils.data num_cols = data_row.shape[0] data = np.zeros((num_samples, num_cols)) categorical_features = range(num_cols) instance_sample = data_row scale = self.scaler.scale_ mean = self.scaler.mean_ ############################################## # Insert VAE Sampling here if dataset == "german": from train_german_vae import VAE elif dataset == "compas": from train_compas_vae import VAE elif dataset == "cc": from train_cc_vae import VAE else: raise FileNotFoundError( "Please state one of the following datasets [german, compas, cc] and make sure" "that the respective VAE model exists.") device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model = VAE(data_row.shape[0]).to(device) model.load_state_dict( torch.load( "../../experiments/{0}/vae_lime_{0}.pt".format(dataset))) model.eval() with torch.no_grad(): sample = torch.randn(num_samples, 30).to(device) # Test Idea: Encode data row once, and sample from generated latent space. # x = np.asarray(data_row, dtype=np.float32) # for i in range(num_samples): # sample, _, _ = model.forward(torch.from_numpy(x).to(device)) # data[i] = sample.cpu().numpy().reshape(-1, num_cols) # results do not differ from standard random sampling into decode sample = model.decode(sample).cpu() data = sample.numpy().reshape(num_samples, num_cols) # data = [np.round(i, 0) for i in data] ############################################## # data = self.random_state.normal( # 0, 1, num_samples * num_cols).reshape( # num_samples, num_cols) if self.sample_around_instance: data = data * scale + instance_sample else: data = data * scale + mean # Convert categorical sample columns to 0-1 # Decide how to handle scaling here. I want to keep the scaling as is. # They mainly exploited numerical values. Think about the implications here for our approach. categorical_features = self.categorical_features first_row = data_row data[0] = data_row.copy() inverse = data.copy() for column in categorical_features: # data[:, column] = utils.one_hot_encode(data[:, column]) data[:, column] = np.round(data[:, column]) values = self.feature_values[column] freqs = self.feature_frequencies[column] # inverse_column = self.random_state.choice(values, size=num_samples, # replace=True, p=freqs) # Here we NEED to copy! inverse_column = data[:, column].copy() binary_column = (inverse_column == first_row[column]).astype(int) binary_column[0] = 1 inverse_column[0] = data[0, column] data[:, column] = binary_column inverse[:, column] = inverse_column inverse[0] = data_row return data, inverse
def __init__(self, data): self.data = data.copy()
def __init__(self, root, phase, debug = 1 ): file_name = phase fn = file_name mode = phase file_path = os.path.join(root, fn + '.csv') x_list = [] y_list = [] self.max_y = 0 self.min_data_list_size = 4 #TODO 2005 min data list f = open( file_path, 'r', encoding='utf-8-sig' ) # skip two lines f.readline() f.readline() # Train Dataset(2020. 01. 01 ~ 05. 01) 중 3월 30일은 수집 되지 않았음 idx = 0 max_idx = 0 #TODO 1000 max idx min_data_list_size = self.min_data_list_size data_list = [ ] ymdh_prev = None data_distcontinuous_cnt = 0 _999_encountered = 1 #-999 means predition data if mode == "test" : _999_encountered = 0 pass # 한 줄 씩 읽기 for line in f.readlines(): if max_idx and max_idx == idx : log.info( "**** max idx encounterd. %s" % max_idx ) break pass row = np.array( line.strip().split(',') ) ymd = int( row[0] ) hour = int( row[1] ) ymdh = datetime.datetime.strptime( "%04d %02d" % (ymd, hour), '%Y%m%d %H') data = None data = np.asfarray( row[2:], float ) max_y = max( data ) if max_y > self.max_y : self.max_y = max_y pass data_list_clear = 0 if ymdh_prev is None : pass else : duration = ymdh - ymdh_prev debug and log.info( "duration secons = %s" % duration.seconds ) if duration.seconds != 3600 : data_list_clear = 1 pass pass if data_list_clear : # 시간차가 1시간이 아닐 경우, 데이터 목록을 재구성한다. data_list.clear() data_distcontinuous_cnt += 1 if debug : log.info( "[%s][%04d] %s" % (fn, idx, LINE) ) log.info( "[%s][%04d] hour is not continuous. ymdh = %s, ymdh_prev = %s" % (fn, idx, ymdh, ymdh_prev ) ) log.info( "[%s][%04d] %s" % (fn, idx, LINE) ) pass pass debug and log.info( "[%s][%04d] %08d, %02d, input : %s" % (fn, idx, ymd, hour, str(data)) ) if not _999_encountered : if ( -999 in data ) or ( -999.0 in data ) : _999_encountered = 1 print( LINE ) log.info( "[%s][%04d] _999_encountered = %s" % (fn, idx, _999_encountered ) ) print( LINE ) pass pass if len( data_list ) < min_data_list_size : data_list.append( data.copy() ) else : if _999_encountered : # x data generation x_data = [] for r in data_list : for d in r : x_data.append( d ) pass pass x_data = np.array( [ x_data ] ) x_list.append( x_data ) # y data generation y_data = np.array( data.copy() ) y_list.append( y_data ) pass data_list.pop( 0 ) data_list.append( data.copy() ) pass ymdh_prev = ymdh idx += 1 pass # // 한 줄 씩 읽기 self.x_list = np.array( x_list ) self.y_list = np.array( y_list ) debug and log.info( "[%s] data_distcontinuous_cnt : %d" % ( fn, data_distcontinuous_cnt ) ) log.info( "[%s] max_y = %s" % (fn, self.max_y ) )
#We will use MCAR as a driving process behind making missing data has 0.1 probability of being set to NaN. data = pd.read_csv('Tab.delimited.Cleaned.dataset.WITH.variable.labels.csv', sep='\t', engine='python') file = open('categorical.txt') labels = [] for line in file: word = line.rstrip('\n') labels.append(word) data = data.loc[:, labels] #The selected columns prediction data = data.replace(' ', np.nan) data = data.dropna() prob_missing = 0.1 data_incomplete = data.copy() ix = [(row, col) for row in range(data.shape[0]) for col in range(data.shape[1])] for row, col in random.sample(ix, int(round(prob_missing * len(ix)))): data_incomplete.iat[row, col] = np.nan missing_encoded = pd.get_dummies(data_incomplete) complete_encoded = pd.get_dummies(data) print(complete_encoded.info(verbose=True)) for col in data.columns: missing_cols = missing_encoded.columns.str.startswith(str(col) + "_") missing_encoded.loc[data_incomplete[col].isnull(), missing_cols] = np.nan print(missing_encoded.shape) print(missing_encoded.values) print(complete_encoded.shape) print(complete_encoded.values.astype('float'))
def createMask( data): #The function is used to create mask for the missing data miss_data = data.copy() missing_mask = np.isnan(data) #bool matrix of mask miss_data[missing_mask] = -1.0 # fill data with -1 return miss_data, missing_mask
def __init__(self, root, phase='train'): self.root = root self.phase = phase self.labels = {} #self.data_index_pool = [0, 1, 5, 6, 7, 19, 24, 25, 29, 31] #self.label_index_pool = [2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 30, 32, 33, 34] self.label_path = os.path.join(self.root, self.phase + '.csv') with open(self.label_path, 'r', encoding='utf-8-sig') as f: # skip two lines f.readline() f.readline() input_data = [] output_data = [] # 20200330 # Sample Train Dataset(2020. 01. 01 ~ 05. 01) 중 1일치 데이터(3월 30일)은 기계 오류로 인해 데이터가 수집 되지 않았음 idx = 0 max_idx = 0 #TODO 9000 max idx data_prev = [] hour_prev = -1 for line in f.readlines(): values = line.strip().split(',') date = int(values[0]) hour = int(values[1]) if max_idx and max_idx == idx: log.info("**** max idx encounterd. %s" % max_idx) break pass if 20200330 == date and phase == 'train': log.info("**** data encountered: %s, phase = %s \n" % (date, phase)) break pass data = values[2:] data = np.asfarray(data, np.float32) if phase != "train": input = data.copy() output = data.copy() input_data.append(input) output_data.append(output) if 0: log.info( "[%04d] date: %08d, hour: %02d, data len = %d =================================" % (idx, date, hour, len(input))) log.info("[%04d] %08d, %02d, input : %s" % (idx, date, hour, input)) log.info("[%04d] %08d, %02d, output : %s" % (idx, date, hour, output)) pass elif 0 == idx: log.info("data_prev is null.") else: input = data_prev.copy() output = data.copy() input_data.append(input) output_data.append(output) if 1: log.info( "[%04d] date: %08d, hour: %02d, data len = %d =================================" % (idx, date, hour, len(input))) log.info("[%04d] %08d, %02d, input : %s" % (idx, date, hour, input)) log.info("[%04d] %08d, %02d, output : %s" % (idx, date, hour, output)) pass pass data_prev = data idx += 1 pass global is_corrcoeff_saved if not is_corrcoeff_saved: #TODO 0020 corrcoef matrix covMatrix = np.corrcoef(np.transpose(input_data), bias=True) print("===== corrcoef matrix ======") print(covMatrix) np.savetxt("corrcoef.csv", covMatrix, delimiter=",") is_corrcoeff_saved = 1 pass if 0: #TODO 0006 시스템 강제 종료 log.info("sys.exit(0)") import sys sys.exit(0) pass pass self.labels['input'] = input_data self.labels['output'] = output_data
def __data_inverse(self, data_row, num_samples): """Generates a neighborhood around a prediction. For numerical features, perturb them by sampling from a Normal(0,1) and doing the inverse operation of mean-centering and scaling, according to the means and stds in the training data. For categorical features, perturb by sampling according to the training distribution, and making a binary feature that is 1 when the value is the same as the instance being explained. Args: data_row: 1d numpy array, corresponding to a row num_samples: size of the neighborhood to learn the linear model Returns: A tuple (data, inverse), where: data: dense num_samples * K matrix, where categorical features are encoded with either 0 (not equal to the corresponding value in data_row) or 1. The first row is the original instance. inverse: same as data, except the categorical features are not binary, but categorical (as the original data) """ is_sparse = sp.sparse.issparse(data_row) if is_sparse: num_cols = data_row.shape[1] data = sp.sparse.csr_matrix((num_samples, num_cols), dtype=data_row.dtype) else: num_cols = data_row.shape[0] data = np.zeros((num_samples, num_cols)) categorical_features = range(num_cols) if self.discretizer is None: instance_sample = data_row scale = self.scaler.scale_ mean = self.scaler.mean_ if is_sparse: # Perturb only the non-zero values non_zero_indexes = data_row.nonzero()[1] num_cols = len(non_zero_indexes) instance_sample = data_row[:, non_zero_indexes] scale = scale[non_zero_indexes] mean = mean[non_zero_indexes] data = self.random_state.normal(0, 1, num_samples * num_cols).reshape( num_samples, num_cols) if self.sample_around_instance: data = data * scale + instance_sample else: data = data * scale + mean if is_sparse: if num_cols == 0: data = sp.sparse.csr_matrix( (num_samples, data_row.shape[1]), dtype=data_row.dtype) else: indexes = np.tile(non_zero_indexes, num_samples) indptr = np.array( range(0, len(non_zero_indexes) * (num_samples + 1), len(non_zero_indexes))) data_1d_shape = data.shape[0] * data.shape[1] data_1d = data.reshape(data_1d_shape) data = sp.sparse.csr_matrix( (data_1d, indexes, indptr), shape=(num_samples, data_row.shape[1])) categorical_features = self.categorical_features first_row = data_row else: first_row = self.discretizer.discretize(data_row) data[0] = data_row.copy() inverse = data.copy() for column in categorical_features: values = self.feature_values[column] freqs = self.feature_frequencies[column] inverse_column = self.random_state.choice(values, size=num_samples, replace=True, p=freqs) binary_column = (inverse_column == first_row[column]).astype(int) binary_column[0] = 1 inverse_column[0] = data[0, column] data[:, column] = binary_column inverse[:, column] = inverse_column if self.discretizer is not None: inverse[1:] = self.discretizer.undiscretize(inverse[1:]) inverse[0] = data_row return data, inverse
(coordy + yd) <= 0) | ( (coordx + xd) >= data.shape[2]) | ( (coordx + xd) <= 0): continue temp = int(redu_range * 10) redu = random.randrange(0, temp, 1) print(coordz + zd, coordy + yd, coordx + xd) rand = random.randrange(0, 10, 1) if (rand == 7) | (rand == 6) | (rand == 9) | (rand == 8): data[coordz + zd, coordy + yd, coordx + xd] = (data[coordz + zd, coordy + yd, coordx + xd]) * 0.55 continue data[coordz + zd, coordy + yd, coordx + xd] = (data[coordz + zd, coordy + yd, coordx + xd]) * 0.4 with h5py.File('image-2.h5', 'r') as raw: data = (raw['image'][()]) # .a print(data) skimage.io.imsave('test2.tif', data.astype('uint8')) data1 = data.copy() print(data1 == data) abs_gap_aug(data1, 5, 0.5) print("!!!!!!!!!!!!!!!") skimage.io.imsave('test1.tif', data1.astype('uint8')) print(data1 == data)
def __init__(self, data): self.data = data.copy().astype(np.float32)
def main(): global i global loss_sum global running parser = ArgumentParser() parser.add_argument("-bit_w", type=int, default=8, help="Bit vector length for copy task") parser.add_argument("-block_w", type=int, default=3, help="Block width to associative recall task") parser.add_argument("-len", type=str, default="4", help="Sequence length for copy task", parser=lambda x: [int(a) for a in x.split("-")]) parser.add_argument("-repeat", type=str, default="1", help="Sequence length for copy task", parser=lambda x: [int(a) for a in x.split("-")]) parser.add_argument("-batch_size", type=int, default=16, help="Sequence length for copy task") parser.add_argument("-n_subbatch", type=str, default="auto", help="Average this much forward passes to a backward pass") parser.add_argument("-max_input_count_per_batch", type=int, default=6000, help="Max batch_size*len that can fit into memory") parser.add_argument("-lr", type=float, default=0.0001, help="Learning rate") parser.add_argument("-wd", type=float, default=1e-5, help="Weight decay") parser.add_argument("-optimizer", type=str, default="rmsprop", help="Optimizer algorithm") parser.add_argument("-name", type=str, help="Save training to this directory") parser.add_argument("-preview_interval", type=int, default=10, help="Show preview every nth iteration") parser.add_argument("-info_interval", type=int, default=10, help="Show info every nth iteration") parser.add_argument("-save_interval", type=int, default=500, help="Save network every nth iteration") parser.add_argument("-masked_lookup", type=bool, default=1, help="Enable masking in content lookups") parser.add_argument("-visport", type=int, default=-1, help="Port to run Visdom server on. -1 to disable") parser.add_argument("-gpu", default="auto", type=str, help="Run on this GPU.") parser.add_argument("-debug", type=bool, default=1, help="Enable debugging") parser.add_argument("-task", type=str, default="copy", help="Task to learn") parser.add_argument("-mem_count", type=int, default=16, help="Number of memory cells") parser.add_argument("-data_word_size", type=int, default=128, help="Memory word size") parser.add_argument("-n_read_heads", type=int, default=1, help="Number of read heads") parser.add_argument("-layer_sizes", type=str, default="256", help="Controller layer sizes. Separate with ,. For example 512,256,256", parser=lambda x: [int(y) for y in x.split(",") if y]) parser.add_argument("-debug_log", type=bool, default=0, help="Enable debug log") parser.add_argument("-controller_type", type=str, default="lstm", help="Controller type: lstm or linear") parser.add_argument("-lstm_use_all_outputs", type=bool, default=1, help="Use all LSTM outputs as controller output vs use only the last layer") parser.add_argument("-momentum", type=float, default=0.9, help="Momentum for optimizer") parser.add_argument("-embedding_size", type=int, default=256, help="Size of word embedding for NLP tasks") parser.add_argument("-test_interval", type=int, default=10000, help="Run test in this interval") parser.add_argument("-dealloc_content", type=bool, default=1, help="Deallocate memory content, unlike DNC, which leaves it unchanged, just decreases the usage counter, causing problems with lookup") parser.add_argument("-sharpness_control", type=bool, default=1, help="Distribution sharpness control for forward and backward links") parser.add_argument("-think_steps", type=int, default=0, help="Iddle steps before requiring the answer (for bAbi)") parser.add_argument("-dump_profile", type=str, save=False) parser.add_argument("-test_on_start", default="0", save=False) parser.add_argument("-dump_heatmaps", default=False, save=False) parser.add_argument("-test_batch_size", default=16) parser.add_argument("-mask_min", default=0.0) parser.add_argument("-load", type=str, save=False) parser.add_argument("-dataset_path", type=str, default="none", parser=ArgumentParser.str_or_none(), help="Specify babi path manually") parser.add_argument("-babi_train_tasks", type=str, default="none", parser=ArgumentParser.list_or_none(type=str), help="babi task list to use for training") parser.add_argument("-babi_test_tasks", type=str, default="none", parser=ArgumentParser.list_or_none(type=str), help="babi task list to use for testing") parser.add_argument("-babi_train_sets", type=str, default="train", parser=ArgumentParser.list_or_none(type=str), help="babi train sets to use") parser.add_argument("-babi_test_sets", type=str, default="test", parser=ArgumentParser.list_or_none(type=str), help="babi test sets to use") parser.add_argument("-noargsave", type=bool, default=False, help="Do not save modified arguments", save=False) parser.add_argument("-demo", type=bool, default=False, help="Do a single step with fixed seed", save=False) parser.add_argument("-exit_after", type=int, help="Exit after this amount of steps. Useful for debugging.", save=False) parser.add_argument("-grad_clip", type=float, default=10.0, help="Max gradient norm") parser.add_argument("-clip_controller", type=float, default=20.0, help="Max gradient norm") parser.add_argument("-print_test", default=False, save=False) parser.add_profile([ ArgumentParser.Profile("babi", { "preview_interval": 10, "save_interval": 500, "task": "babi", "mem_count": 256, "data_word_size": 64, "n_read_heads": 4, "layer_sizes": "256", "controller_type": "lstm", "lstm_use_all_outputs": True, "momentum": 0.9, "embedding_size": 128, "test_interval": 5000, "think_steps": 3, "batch_size": 2 }, include=["dnc-msd"]), ArgumentParser.Profile("repeat_copy", { "bit_w": 8, "repeat": "1-8", "len": "2-14", "task": "copy", "think_steps": 1, "preview_interval": 10, "info_interval": 10, "save_interval": 100, "data_word_size": 16, "layer_sizes": "32", "n_subbatch": 1, "controller_type": "lstm", }), ArgumentParser.Profile("repeat_copy_simple", { "repeat": "1-3", }, include="repeat_copy"), ArgumentParser.Profile("dnc", { "masked_lookup": False, "sharpness_control": False, "dealloc_content": False }), ArgumentParser.Profile("dnc-m", { "masked_lookup": True, "sharpness_control": False, "dealloc_content": False }), ArgumentParser.Profile("dnc-s", { "masked_lookup": False, "sharpness_control": True, "dealloc_content": False }), ArgumentParser.Profile("dnc-d", { "masked_lookup": False, "sharpness_control": False, "dealloc_content": True }), ArgumentParser.Profile("dnc-md", { "masked_lookup": True, "sharpness_control": False, "dealloc_content": True }), ArgumentParser.Profile("dnc-ms", { "masked_lookup": True, "sharpness_control": True, "dealloc_content": False }), ArgumentParser.Profile("dnc-sd", { "masked_lookup": False, "sharpness_control": True, "dealloc_content": True }), ArgumentParser.Profile("dnc-msd", { "masked_lookup": True, "sharpness_control": True, "dealloc_content": True }), ArgumentParser.Profile("keyvalue", { "repeat": "1", "len": "2-16", "mem_count": 16, "task": "keyvalue", "think_steps": 1, "preview_interval": 10, "info_interval": 10, "data_word_size": 32, "bit_w": 12, "save_interval": 1000, "layer_sizes": "32" }), ArgumentParser.Profile("keyvalue2way", { "task": "keyvalue2way", }, include="keyvalue"), ArgumentParser.Profile("associative_recall",{ "task": "recall", "bit_w": 8, "len": "2-16", "mem_count": 64, "data_word_size": 32, "n_read_heads": 1, "layer_sizes": "128", "controller_type": "lstm", "lstm_use_all_outputs": 1, "think_steps": 1, "mask_min": 0.1, "info_interval": 10, "save_interval": 1000, "preview_interval": 10, "n_subbatch": 1, }) ]) opt = parser.parse() assert opt.name is not None, "Training dir (-name parameter) not given" opt = parser.sync(os.path.join(opt.name, "args.json"), save=not opt.noargsave) if opt.demo: Seed.fix() os.makedirs(os.path.join(opt.name,"save"), exist_ok=True) os.makedirs(os.path.join(opt.name,"preview"), exist_ok=True) gpu_allocator.use_gpu(opt.gpu) debug.enableDebug = opt.debug_log if opt.visport>0: Visdom.start(opt.visport) Visdom.Text("Name").set(opt.name) class LengthHackSampler: def __init__(self, batch_size, length): self.length = length self.batch_size = batch_size def __iter__(self): while True: len = self.length() if callable(self.length) else self.length yield [len] * self.batch_size def __len__(self): return 0x7FFFFFFF embedding = None test_set = None curriculum = None loader_reset = False if opt.task=="copy": dataset = CopyData(bit_w=opt.bit_w) in_size = opt.bit_w + 1 out_size = in_size elif opt.task=="recall": dataset = AssociativeRecall(bit_w=opt.bit_w, block_w=opt.block_w) in_size = opt.bit_w + 2 out_size = in_size elif opt.task=="keyvalue": assert opt.bit_w % 2==0, "Key-value datasets works only with even bit_w" dataset = KeyValue(bit_w=opt.bit_w) in_size = opt.bit_w + 1 out_size = opt.bit_w//2 elif opt.task=="keyvalue2way": assert opt.bit_w % 2==0, "Key-value datasets works only with even bit_w" dataset = KeyValue2Way(bit_w=opt.bit_w) in_size = opt.bit_w + 2 out_size = opt.bit_w//2 elif opt.task=="babi": dataset = bAbiDataset(think_steps=opt.think_steps, dir_name=opt.dataset_path) test_set = bAbiDataset(think_steps=opt.think_steps, dir_name=opt.dataset_path, name="test") dataset.use(opt.babi_train_tasks, opt.babi_train_sets) in_size = opt.embedding_size print("bAbi: loaded total of %d sequences." % len(dataset)) test_set.use(opt.babi_test_tasks, opt.babi_test_sets) out_size = len(dataset.vocabulary) print("bAbi: using %d sequences for training, %d for testing" % (len(dataset), len(test_set))) else: assert False, "Invalid task: %s" % opt.task if opt.task in ["babi"]: data_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True, shuffle=True, collate_fn=MetaCollate()) test_loader = torch.utils.data.DataLoader(test_set, batch_size=opt.test_batch_size, num_workers=opt.test_batch_size, pin_memory=True, shuffle=False, collate_fn=MetaCollate()) if test_set is not None else None else: dataset = BitmapTaskRepeater(dataset) data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=LengthHackSampler(opt.batch_size, BitmapTaskRepeater.key_sampler(opt.len, opt.repeat)), num_workers=1, pin_memory=True) if opt.controller_type == "lstm": controller_constructor = functools.partial(LSTMController, out_from_all_layers=opt.lstm_use_all_outputs) elif opt.controller_type == "linear": controller_constructor = FeedforwardController else: assert False, "Invalid controller: %s" % opt.controller_type model = DNC(in_size, out_size, opt.data_word_size, opt.mem_count, opt.n_read_heads, controller_constructor(opt.layer_sizes), batch_first=True, mask=opt.masked_lookup, dealloc_content=opt.dealloc_content, link_sharpness_control=opt.sharpness_control, mask_min=opt.mask_min, clip_controller=opt.clip_controller) params = [ {'params': [p for n, p in model.named_parameters() if not n.endswith(".bias")]}, {'params': [p for n, p in model.named_parameters() if n.endswith(".bias")], 'weight_decay': 0} ] device = torch.device('cuda') if opt.gpu!="none" else torch.device("cpu") print("DEVICE: ", device) if isinstance(dataset, NLPTask): embedding = torch.nn.Embedding(len(dataset.vocabulary), opt.embedding_size).to(device) params.append({'params': embedding.parameters(), 'weight_decay': 0}) if opt.optimizer=="sgd": optimizer = torch.optim.SGD(params, lr=opt.lr, weight_decay=opt.wd, momentum=opt.momentum) elif opt.optimizer=="adam": optimizer = torch.optim.Adam(params, lr=opt.lr, weight_decay=opt.wd) elif opt.optimizer == "rmsprop": optimizer = torch.optim.RMSprop(params, lr=opt.lr, weight_decay=opt.wd, momentum=opt.momentum, eps=1e-10) else: assert "Invalid optimizer: %s" % opt.optimizer n_params = sum([sum([t.numel() for t in d['params']]) for d in params]) print("Number of parameters: %d" % n_params) model = model.to(device) if embedding is not None and hasattr(embedding, "to"): embedding = embedding.to(device) i=0 loss_sum = 0 loss_plot = Visdom.Plot2D("loss", store_interval=opt.info_interval, xlabel="iterations", ylabel="loss") if curriculum is not None: curriculum_plot = Visdom.Plot2D("curriculum lesson" + (" (last %d)" % (curriculum.n_lessons-1) if curriculum.n_lessons is not None else ""), xlabel="iterations", ylabel="lesson") curriculum_accuracy = Visdom.Plot2D("curriculum accuracy", xlabel="iterations", ylabel="accuracy") saver = Saver(os.path.join(opt.name, "save"), short_interval=opt.save_interval) saver.register("model", StateSaver(model)) saver.register("optimizer", StateSaver(optimizer)) saver.register("i", GlobalVarSaver("i")) saver.register("loss_sum", GlobalVarSaver("loss_sum")) saver.register("loss_plot", StateSaver(loss_plot)) saver.register("dataset", StateSaver(dataset)) if test_set: saver.register("test_set", StateSaver(test_set)) if curriculum is not None: saver.register("curriculum", StateSaver(curriculum)) saver.register("curriculum_plot", StateSaver(curriculum_plot)) saver.register("curriculum_accuracy", StateSaver(curriculum_accuracy)) if isinstance(dataset, NLPTask): saver.register("word_embeddings", StateSaver(embedding)) elif embedding is not None: saver.register("embeddings", StateSaver(embedding)) if not saver.load(opt.load): model.reset_parameters() if embedding is not None: embedding.reset_parameters() visualizers = {} debug_schemas={ "read_head" : { "list_dim" : 2 }, "temporal_links/forward_dists" : { "list_dim" : 2 }, "temporal_links/backward_dists" : { "list_dim" : 2 } } def plot_debug(debug, prefix="", schema={}): if debug is None: return for k, v in debug.items(): curr_name = prefix+k if curr_name in debug_schemas: curr_schema = schema.copy() curr_schema.update(debug_schemas[curr_name]) else: curr_schema = schema if isinstance(v, dict): plot_debug(v, curr_name+"/", curr_schema) continue data = v[0] if curr_schema.get("list_dim",-1) > 0: if data.ndim != 3: print("WARNING: unknown data shape for array display: %s, tensor %s" % (data.shape, curr_name)) continue n_steps = data.shape[curr_schema["list_dim"]-1] if curr_name not in visualizers: visualizers[curr_name] = [Visdom.Heatmap(curr_name+"_%d" % i, dumpdir=os.path.join(opt.name, "preview") if opt.dump_heatmaps else None) for i in range(n_steps)] for i in range(n_steps): visualizers[curr_name][i].draw(index_by_dim(data, curr_schema["list_dim"]-1, i)) else: if data.ndim != 2: print("WARNING: unknown data shape for simple display: %s, tensor %s" % (data.shape, curr_name)) continue if curr_name not in visualizers: visualizers[curr_name] = Visdom.Heatmap(curr_name, dumpdir=os.path.join(opt.name, "preview") if opt.dump_heatmaps else None) visualizers[curr_name].draw(data) def run_model(input, debug=None): if isinstance(dataset, NLPTask): input = embedding(input["input"]) else: input = input["input"] * 2.0 - 1.0 return model(input, debug=debug) def multiply_grads(params, mul): if mul==1: return for pa in params: for p in pa["params"]: p.grad.data *= mul def test(): if test_set is None: return print("TESTING...") start_time=time.time() t = test_set.start_test() with torch.no_grad(): for data in tqdm(test_loader): data = {k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items()} if hasattr(dataset, "prepare"): data = dataset.prepare(data) net_out = run_model(data) test_set.veify_result(t, data, net_out) test_set.show_test_results(i, t) print("Test done in %gs" % (time.time() - start_time)) if opt.test_on_start.lower() in ["on", "1", "true", "quit"]: test() if opt.test_on_start.lower() == "quit": saver.write(i) sys.exit(-1) if opt.print_test: model.eval() total = 0 correct = 0 with torch.no_grad(): for data in tqdm(test_loader): if not running: return data = {k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items()} if hasattr(test_set, "prepare"): data = test_set.prepare(data) net_out = run_model(data) c,t = test_set.curriculum_measure(net_out, data["output"]) total += t correct += c print("Test result: %2.f%% (%d out of %d correct)" % (100.0*correct/total, correct, total)) model.train() return iter_start_time = time.time() if i % opt.info_interval == 0 else None data_load_total_time = 0 start_i = i if opt.dump_profile: profiler = torch.autograd.profiler.profile(use_cuda=True) if opt.dump_heatmaps: dataset.set_dump_dir(os.path.join(opt.name, "preview")) @preview() def do_visualize(raw_data, output, pos_map, debug): if pos_map is not None: output = embedding.backmap_output(output, pos_map, raw_data["output"].shape[1]) dataset.visualize_preview(raw_data, output) if debug is not None: plot_debug(debug) preview_timer=OnceEvery(opt.preview_interval) pos_map = None start_iter = i if curriculum is not None: curriculum.init() while running: data_load_timer = time.time() for data in data_loader: if not running: break if loader_reset: print("Loader reset requested. Resetting...") loader_reset = False if curriculum is not None: curriculum.lesson_started() break if opt.dump_profile: if i==start_i+1: print("Starting profiler") profiler.__enter__() elif i==start_i+5+1: print("Stopping profiler") profiler.__exit__(None, None, None) print("Average stats") print(profiler.key_averages().table("cpu_time_total")) print("Writing trace to file") profiler.export_chrome_trace(opt.dump_profile) print("Done.") sys.exit(0) else: print("Step %d out of 5" % (i-start_i)) debug.dbg_print("-------------------------------------") raw_data = data data = {k: v.to(device) if torch.is_tensor(v) else v for k,v in data.items()} if hasattr(dataset, "prepare"): data = dataset.prepare(data) data_load_total_time += time.time() - data_load_timer need_preview = preview_timer() debug_data = {} if opt.debug and need_preview else None optimizer.zero_grad() if opt.n_subbatch=="auto": n_subbatch = math.ceil(data["input"].numel() / opt.max_input_count_per_batch) else: n_subbatch = int(opt.n_subbatch) real_batch = max(math.floor(opt.batch_size/n_subbatch),1) n_subbatch = math.ceil(opt.batch_size/real_batch) remaning_batch = opt.batch_size % real_batch for subbatch in range(n_subbatch): if not running: break input = data["input"] target = data["output"] if n_subbatch!=1: input = input[subbatch * real_batch: (subbatch + 1) * real_batch] target = target[subbatch * real_batch:(subbatch + 1) * real_batch] f2 = data.copy() f2["input"] = input output = run_model(f2, debug=debug_data if subbatch==n_subbatch-1 else None) l = dataset.loss(output, target) debug.nan_check(l, force=True) l.backward() if curriculum is not None: curriculum.update(*dataset.curriculum_measure(output, target)) if remaning_batch!=0 and subbatch == n_subbatch-2: multiply_grads(params, real_batch/remaning_batch) if n_subbatch!=1: if remaning_batch==0: multiply_grads(params, 1/n_subbatch) else: multiply_grads(params, remaning_batch / opt.batch_size) for p in params: torch.nn.utils.clip_grad_norm_(p["params"], opt.grad_clip) optimizer.step() i += 1 curr_loss = l.data.item() loss_plot.add_point(i, curr_loss) loss_sum += curr_loss if i % opt.info_interval == 0: tim = time.time() loss_avg = loss_sum / opt.info_interval if curriculum is not None: curriculum_accuracy.add_point(i, curriculum.get_accuracy()) curriculum_plot.add_point(i, curriculum.step) message = "Iteration %d, loss: %.4f" % (i, loss_avg) if iter_start_time is not None: message += " (%.2f ms/iter, load time %.2g ms/iter, visport: %s)" % ( (tim - iter_start_time) / opt.info_interval * 1000.0, data_load_total_time / opt.info_interval * 1000.0, Visdom.port) print(message) iter_start_time = tim loss_sum = 0 data_load_total_time = 0 debug.dbg_print("Iteration %d, loss %g" % (i, curr_loss)) if need_preview: do_visualize(raw_data, output, pos_map, debug_data) if i % opt.test_interval==0: test() saver.tick(i) if opt.demo and opt.exit_after is None: running = False input("Press enter to quit.") if opt.exit_after is not None and (i-start_iter)>=opt.exit_after: running=False data_load_timer = time.time()