示例#1
0
X.fillna(0, inplace=True)
X["PairId"] = X["PairId"].apply(lambda x: int(x)-1)
#print X

edges = dataset.getAdjList_clique()
#edges = dataset.getAdjList_lineGraph()
#edges = dataset.getAdjList_tags()
#edges = dataset.getAdjList_allTags()

print "Length of edges returned", len(edges)

Adj, rowsum = get_GCN_inputs(edges, len(X))
print "Adjacency Graph", Adj
#print X

X_Tags_Feature = Variable(getPostContexts(X, dataset), requires_grad=False)
print len(X_Tags_Feature[0])



Pairs_train, Pairs_test, Y_train, Y_test = train_test_split(X.loc[:,'PairId'], X.loc[:,'Credible'], test_size=0.2, random_state=1234)
trainset = torch.utils.data.TensorDataset(torch.LongTensor(Pairs_train.values), torch.torch.LongTensor(Y_train.values))
testset = torch.utils.data.TensorDataset(torch.LongTensor(Pairs_test.values), torch.torch.LongTensor(Y_test.values))


# Data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=trainset,
                                                batch_size=args.batch_size)

test_loader = torch.utils.data.DataLoader(dataset=testset,
                                          batch_size=len(Pairs_test),
示例#2
0
    for Pairs_train, Pairs_test in kf.split(Dataset):
        count += 1
        if count == 4:
            print result_list
            print sum(result_list) / len(result_list)
            torch.save(
                gcn_model.state_dict(),
                '/home/junting/Downloads/GCN/UserCredibility/Accuracy-Stackexchange/WWW/gcnmodel_'
                + args.root.split('/')[-1] + '_www' + '.pt')
            exit()

        edges4 = dataset.getAdjList_Similarity3(Pairs_train.tolist())
        Adj4, rowsum = get_GCN_inputs3(edges4, len(X))
        Adj4 = Adj4.to(device)
        X["Rating"] = dataset.Rating["Rating"]
        data, X_Tags_Feature2 = getPostContexts(X, dataset)
        X_Tags_Feature = Variable(data, requires_grad=False).cuda()

        # setting of GCN
        nComm = 1
        nHid1 = 50
        nHid2 = 10
        nHid3 = 10
        nHid4 = 5
        nFeat = X_Tags_Feature.shape[1]
        gcn_model = GCN_WWW(nFeat, nHid1, nHid2, nHid3, nHid4,
                            nComm).to(device)
        criterion = nn.MSELoss()
        gcn_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                                gcn_model.parameters()),
                                         lr=args.learning_rate,
示例#3
0
#NegClass_Sample = pd.DataFrame(NegClass_List, columns=PosClass.columns)
if len(PosClass) > len(NegClass):
    NegClass_Sample = NegClass
else:
    NegClass_Sample = NegClass.sample(n=len(PosClass))

X = pd.concat([PosClass, NegClass_Sample])

## PreProcess the dataset
cols = X.columns.drop('QTags')
X[cols] = X[cols].apply(pd.to_numeric, errors='coerce')
X.fillna(0, inplace=True)
X["PairId"] = X["PairId"].apply(lambda x: int(x) - 1)

X_Features_scaled = getPostContexts(X, dataset)
X.iloc[:, 6:] = X_Features_scaled

print X
exit()
#Text_Features_df = dataset.getTextFeatures(X)
#X_Text = X.merge(Text_Features_df, on='PairId')
#dtype = dict(PairId=int)

# #print X.dtypes
# Posters = X.AskerId.unique()
# Commenters = X.CommenterId.unique()
# Users = set(Posters).union(set(Commenters))
#
# #Create UserIndex
# UserIndex = dict()