def createAndUpdateReservoir(index): (varF, F, init) = addReservoirParameters(I, bn.getRandomParameters, N, K, isConstantConnectivity=constK) res = Reservoir(I, L, input_fp, directory + 'reservoir_%d_output' % index, N, varF, F, init) res.update(numberOfUpdates) # print('Reservoir %d has vector representation\n' % i) # print(res.getHistoryAsVectors()) return res # if i % 5 == 0: print(index, '. . .')
def updateReservoir(index): ## (varF, F, init) = addReservoirParameters(1, bn.getParametersFromFile, ## 5, directory + linkages_filename, ## directory + functions_filename, ## directory + initial_filename) (varF, F, init) = addReservoirParameters(I, bn.getRandomParameters, N, K, isConstantConnectivity=constK) r = Reservoir(I, L, directory + inputs_filename, outputs_filepath + '_reservoir_%d_output' % index, N, varF, F, init) r.update(numberOfUpdates) print('done at index %d' % index) return r
def main(): assert opt.dataset in ['gowalla', 'lastfm'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cur_dir = os.getcwd() train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='train') train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True) train_for_res, _ = load_data_valid( os.path.expanduser( os.path.normpath(cur_dir + '/../datasets/' + opt.dataset + '/raw/train.txt.csv')), 0) max_train_item = max(max(max(train_for_res[0])), max(train_for_res[1])) max_train_user = max(train_for_res[2]) test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test1') test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False) test_for_res = load_testdata( os.path.expanduser( os.path.normpath(cur_dir + '/../datasets/' + opt.dataset + '/raw/test1.txt.csv'))) max_item = max(max(max(test_for_res[0])), max(test_for_res[1])) max_user = max(test_for_res[2]) pre_max_item = max_train_item pre_max_user = max_train_user log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/paper200/' + str( opt) + '_fix_new_entropy(rank)_on_union+' + str(opt.u) + 'tanh*u_AGCN***GAG-win' + str(opt.win_size) \ + '***concat3_linear_tanh_in_e2s_' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not os.path.exists(log_dir): os.makedirs(log_dir) logging.warning('logging to {}'.format(log_dir)) writer = SummaryWriter(log_dir) if opt.dataset == 'gowalla': n_item = 30000 n_user = 33005 else: n_item = 10000 n_user = 984 model = GNNModel(hidden_size=opt.hidden_size, n_item=n_item, n_user=n_user, u=opt.u).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2, 3], gamma=opt.lr_dc) logging.warning(model) # offline training on 'train' and test on 'test1' logging.warning('*********Begin offline training*********') updates_per_epoch = len(train_loader) updates_count = 0 for train_epoch in tqdm(range(opt.epoch)): forward(model, train_loader, device, writer, train_epoch, optimizer=optimizer, train_flag=True, max_item_id=max_train_item, last_update=updates_count) scheduler.step() updates_count += updates_per_epoch with torch.no_grad(): forward(model, test_loader, device, writer, train_epoch, train_flag=False, max_item_id=max_item) # reservoir construction with 'train' logging.warning( '*********Constructing the reservoir with offline training data*********' ) res = Reservoir(train_for_res, opt.res_size) res.update(train_for_res) # test and online training on 'test2~5' logging.warning('*********Begin online training*********') now = time.asctime() for test_epoch in tqdm(range(1, 6)): if test_epoch != 1: test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test' + str(test_epoch)) test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False) test_for_res = load_testdata( os.path.expanduser( os.path.normpath(cur_dir + '/../datasets/' + opt.dataset + '/raw/test' + str(test_epoch) + '.txt.csv'))) pre_max_item = max_item pre_max_user = max_user max_item = max(max(max(test_for_res[0])), max(test_for_res[1])) max_user = max(test_for_res[2]) # test on the current test set # no need to test on test1 because it's done in the online training part # epoch + 10 is a number only for the visualization convenience with torch.no_grad(): forward(model, test_loader, device, writer, test_epoch + 10, train_flag=False, max_item_id=max_item) # reservoir sampling sampled_data = fix_new_entropy_on_union(cur_dir, now, opt, model, device, res.data, test_for_res, len(test_for_res[0]) // opt.win_size, pre_max_item, pre_max_user, ent='wass') # cast the sampled set to dataset sampled_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='sampled' + now, sampled_data=sampled_data) sampled_loader = DataLoader(sampled_dataset, batch_size=opt.batch_size, shuffle=True) # update with the sampled set forward(model, sampled_loader, device, writer, test_epoch + opt.epoch, optimizer=optimizer, train_flag=True, max_item_id=max_item, last_update=updates_count) updates_count += len(test_loader) scheduler.step() res.update(test_for_res) os.remove('../datasets/' + opt.dataset + '/processed/sampled' + now + '.pt')
L = 2 # number of connections per input # File paths input_fp = reservoir.directory + reservoir.inputs_filename output_fp = reservoir.outputs_filepath # Network parameters linkages = bn.varF functions = bn.F initialNodes = bn.init np.random.seed(3) res = Reservoir(I, L, input_fp, output_fp, N, linkages, functions, initialNodes) res.getHistoryAsVectors() res.update(100) df = pd.DataFrame(res.getHistoryAsVectors()) res.getHistoryAsVectors() # X_train = np.array(df.iloc[:-20,:-1].sort_index(1)) # X_test = np.array(df.iloc[-20:,:-1].sort_index(1)) # y_train = df.iloc[:-20,-1] # y_test = df.iloc[-20:,-1] for i in range(5, 90, 5): X_train = df.iloc[:i, :-1] X_test = df.iloc[i:, :-1] y_train = df.iloc[:i, -1] y_test = df.iloc[i:, -1] len(y_test)
import scipy.stats as stats from matplotlib import pyplot as plt import seaborn as sns import booleanNetwork as bn from reservoir import Reservoir from sklearn.linear_model import Lasso from sklearn.linear_model import LinearRegression fp = '/Users/maxnotarangelo/Documents/ISB/BN_realization/time_series_data.csv' N = 100 numberOfInputs = 1 L = 5 (random_linkages, random_functions, random_init) = bn.getRandomParameters(N, 2) r = Reservoir(numberOfInputs, L, fp, N, random_linkages, random_functions, random_init) r.update(100) reservoir_data_file = '/Users/maxnotarangelo/Documents/ISB/log.csv' rdf = pd.read_csv(reservoir_data_file) rdf.head() func_data_file = '/Users/maxnotarangelo/Documents/ISB/BN_realization/function_data.csv' fdf = pd.read_csv(func_data_file) fdf.head() rdf['Majority'] = fdf.get('Majority') X = rdf.drop(['Input Node 1', 'Majority'], axis=1) lm = LinearRegression() lm.fit(X, rdf['Majority'])