join = MetaRule([body0, body1], [[[attr_name + "1"], [attr_name + "2"]]], alpha, False) print("done join (" + str(time.time()-begtime) + "s)") proj = Project(join, [attr_name + "0", attr_name + "3"]) print("done project (" + str(time.time()-begtime) + "s)") metap = copy.deepcopy(body0) metap.df.columns = [attr_name + "0", attr_name + "3"] disj = DisjunctionRule([metap, proj], alpha, 0) print("done disjunction (" + str(time.time()-begtime) + "s)") meta = disj df = labels_df_train label = 'Label' step = 1e-3 batch_size = 32 epochs = 1000 y = align_labels(meta, df, label) #tmp_df = meta.df #tmp_df["Label"] = y.numpy() #links = tmp_df.loc[(tmp_df[label] == 1)] #for index, row in links.iterrows(): # reverse = tmp_df.loc[(tmp_df[attr_name + "0"] == row[attr_name + "3"]) & (tmp_df[attr_name + "3"] == row[attr_name + "0"])] # if reverse.shape[0] > 0: # tmp_df.loc[(tmp_df[attr_name + "0"] == row[attr_name + "3"]) & (tmp_df[attr_name + "3"] == row[attr_name + "0"]), [label]] = 1 #y = torch.FloatTensor(tmp_df[[label]].values) #tmp_df.drop([label], axis=1, inplace=True) print("done label alignment (" + str(time.time()-begtime) + "s)") data = TensorDataset(torch.arange(y.size()[0]), y) all_loader = DataLoader(data, batch_size=batch_size, shuffle=True)
body1 = copy.deepcopy(body0) body1.df.columns = [attr_name + "2", attr_name + "3"] join = MetaRule([body0, body1], [[[attr_name + "1"], [attr_name + "2"]]], alpha, False) print("done join (" + str(time.time() - begtime) + "s)") proj = Project(join, [attr_name + "0", attr_name + "3"]) print("done project (" + str(time.time() - begtime) + "s)") #train(proj, labels_df_train, 'Label', 1e-3, 32, 400, True) meta = proj label = 'Label' step = 1e-1 batch_size = 32 epochs = 1000 y = align_labels(meta, labels_df_train, label) print("done label alignment (" + str(time.time() - begtime) + "s)") pos_idx = np.nonzero(y.numpy())[0].tolist() pos_loader = DataLoader(TensorDataset(torch.LongTensor(pos_idx)), batch_size=batch_size, shuffle=True) optimizer = optim.Adam(meta.parameters(), lr=step) loss_fn = nn.BCEWithLogitsLoss(reduction="sum") #basemetapredicate alpha: is predicates, project: sum iter = 0 for epoch in range(epochs): pos_loss = 0.0 for idx in pos_loader: yb = torch.ones(idx[0].size()[0], 1)