Пример #1
0
def createAndUpdateReservoir(index):
    (varF, F, init) = addReservoirParameters(I, bn.getRandomParameters, N, K, isConstantConnectivity=constK)
    res = Reservoir(I, L, input_fp, directory + 'reservoir_%d_output' % index, N,
                    varF, F, init)
    res.update(numberOfUpdates)
    # print('Reservoir %d has vector representation\n' % i)
    # print(res.getHistoryAsVectors())
    return res

    # if i % 5 == 0:
    print(index, '. . .')
Пример #2
0
def updateReservoir(index):
    ##    (varF, F, init) = addReservoirParameters(1, bn.getParametersFromFile,
    ##                                             5, directory + linkages_filename,
    ##                                             directory + functions_filename,
    ##                                             directory + initial_filename)
    (varF, F, init) = addReservoirParameters(I,
                                             bn.getRandomParameters,
                                             N,
                                             K,
                                             isConstantConnectivity=constK)

    r = Reservoir(I, L, directory + inputs_filename,
                  outputs_filepath + '_reservoir_%d_output' % index, N, varF,
                  F, init)
    r.update(numberOfUpdates)
    print('done at index %d' % index)

    return r
Пример #3
0
def main():
    assert opt.dataset in ['gowalla', 'lastfm']
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    cur_dir = os.getcwd()

    train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset,
                                       phrase='train')
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True)
    train_for_res, _ = load_data_valid(
        os.path.expanduser(
            os.path.normpath(cur_dir + '/../datasets/' + opt.dataset +
                             '/raw/train.txt.csv')), 0)
    max_train_item = max(max(max(train_for_res[0])), max(train_for_res[1]))
    max_train_user = max(train_for_res[2])

    test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset,
                                      phrase='test1')
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             shuffle=False)
    test_for_res = load_testdata(
        os.path.expanduser(
            os.path.normpath(cur_dir + '/../datasets/' + opt.dataset +
                             '/raw/test1.txt.csv')))
    max_item = max(max(max(test_for_res[0])), max(test_for_res[1]))
    max_user = max(test_for_res[2])
    pre_max_item = max_train_item
    pre_max_user = max_train_user

    log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/paper200/' + str(
        opt) + '_fix_new_entropy(rank)_on_union+' + str(opt.u) + 'tanh*u_AGCN***GAG-win' + str(opt.win_size) \
              + '***concat3_linear_tanh_in_e2s_' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.warning('logging to {}'.format(log_dir))
    writer = SummaryWriter(log_dir)

    if opt.dataset == 'gowalla':
        n_item = 30000
        n_user = 33005
    else:
        n_item = 10000
        n_user = 984

    model = GNNModel(hidden_size=opt.hidden_size,
                     n_item=n_item,
                     n_user=n_user,
                     u=opt.u).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt.lr,
                                 weight_decay=opt.l2)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[2, 3],
                                                     gamma=opt.lr_dc)

    logging.warning(model)

    # offline training on 'train' and test on 'test1'
    logging.warning('*********Begin offline training*********')
    updates_per_epoch = len(train_loader)
    updates_count = 0
    for train_epoch in tqdm(range(opt.epoch)):
        forward(model,
                train_loader,
                device,
                writer,
                train_epoch,
                optimizer=optimizer,
                train_flag=True,
                max_item_id=max_train_item,
                last_update=updates_count)
        scheduler.step()
        updates_count += updates_per_epoch
        with torch.no_grad():
            forward(model,
                    test_loader,
                    device,
                    writer,
                    train_epoch,
                    train_flag=False,
                    max_item_id=max_item)

    # reservoir construction with 'train'
    logging.warning(
        '*********Constructing the reservoir with offline training data*********'
    )
    res = Reservoir(train_for_res, opt.res_size)
    res.update(train_for_res)

    # test and online training on 'test2~5'
    logging.warning('*********Begin online training*********')
    now = time.asctime()
    for test_epoch in tqdm(range(1, 6)):
        if test_epoch != 1:
            test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' +
                                              opt.dataset,
                                              phrase='test' + str(test_epoch))
            test_loader = DataLoader(test_dataset,
                                     batch_size=opt.batch_size,
                                     shuffle=False)

            test_for_res = load_testdata(
                os.path.expanduser(
                    os.path.normpath(cur_dir + '/../datasets/' + opt.dataset +
                                     '/raw/test' + str(test_epoch) +
                                     '.txt.csv')))
            pre_max_item = max_item
            pre_max_user = max_user
            max_item = max(max(max(test_for_res[0])), max(test_for_res[1]))
            max_user = max(test_for_res[2])

            # test on the current test set
            # no need to test on test1 because it's done in the online training part
            # epoch + 10 is a number only for the visualization convenience
            with torch.no_grad():
                forward(model,
                        test_loader,
                        device,
                        writer,
                        test_epoch + 10,
                        train_flag=False,
                        max_item_id=max_item)

        # reservoir sampling
        sampled_data = fix_new_entropy_on_union(cur_dir,
                                                now,
                                                opt,
                                                model,
                                                device,
                                                res.data,
                                                test_for_res,
                                                len(test_for_res[0]) //
                                                opt.win_size,
                                                pre_max_item,
                                                pre_max_user,
                                                ent='wass')

        # cast the sampled set to dataset
        sampled_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' +
                                             opt.dataset,
                                             phrase='sampled' + now,
                                             sampled_data=sampled_data)
        sampled_loader = DataLoader(sampled_dataset,
                                    batch_size=opt.batch_size,
                                    shuffle=True)

        # update with the sampled set
        forward(model,
                sampled_loader,
                device,
                writer,
                test_epoch + opt.epoch,
                optimizer=optimizer,
                train_flag=True,
                max_item_id=max_item,
                last_update=updates_count)

        updates_count += len(test_loader)

        scheduler.step()

        res.update(test_for_res)
        os.remove('../datasets/' + opt.dataset + '/processed/sampled' + now +
                  '.pt')
L = 2  # number of connections per input

# File paths
input_fp = reservoir.directory + reservoir.inputs_filename
output_fp = reservoir.outputs_filepath

# Network parameters
linkages = bn.varF
functions = bn.F
initialNodes = bn.init

np.random.seed(3)
res = Reservoir(I, L, input_fp, output_fp, N, linkages, functions,
                initialNodes)
res.getHistoryAsVectors()
res.update(100)
df = pd.DataFrame(res.getHistoryAsVectors())
res.getHistoryAsVectors()

# X_train = np.array(df.iloc[:-20,:-1].sort_index(1))
# X_test = np.array(df.iloc[-20:,:-1].sort_index(1))
# y_train = df.iloc[:-20,-1]
# y_test = df.iloc[-20:,-1]

for i in range(5, 90, 5):
    X_train = df.iloc[:i, :-1]
    X_test = df.iloc[i:, :-1]

    y_train = df.iloc[:i, -1]
    y_test = df.iloc[i:, -1]
    len(y_test)
import scipy.stats as stats
from matplotlib import pyplot as plt
import seaborn as sns
import booleanNetwork as bn
from reservoir import Reservoir
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression

fp = '/Users/maxnotarangelo/Documents/ISB/BN_realization/time_series_data.csv'
N = 100
numberOfInputs = 1
L = 5
(random_linkages, random_functions, random_init) = bn.getRandomParameters(N, 2)
r = Reservoir(numberOfInputs, L, fp, N, random_linkages, random_functions,
              random_init)
r.update(100)

reservoir_data_file = '/Users/maxnotarangelo/Documents/ISB/log.csv'
rdf = pd.read_csv(reservoir_data_file)
rdf.head()

func_data_file = '/Users/maxnotarangelo/Documents/ISB/BN_realization/function_data.csv'
fdf = pd.read_csv(func_data_file)
fdf.head()

rdf['Majority'] = fdf.get('Majority')

X = rdf.drop(['Input Node 1', 'Majority'], axis=1)

lm = LinearRegression()
lm.fit(X, rdf['Majority'])