Пример #1
0
def consolidate_features_add(base_graphs, k, Gcollab_delta):
    # Get all the k-hop features
    features = consolidate_features(base_graphs, Gcollab_delta, k)
    Gcollab_base = base_graphs[graphutils.Graph.COLLAB]
    feature_graphs = graphutils.split_feat_graphs(base_graphs)
    org = len(features)
    cnt = 0

    # Add positive features for edges that are not k-hop
    for edge in Gcollab_delta.Edges():
        if cnt < org:
            u = edge.GetSrcNId()
            v = edge.GetDstNId()
            tup = (u, v)

            # Not k-hop and not an edge in base graph
            if (u, v) not in features and (
                    v, u) not in features and not Gcollab_base.IsEdge(u, v):
                features[tup] = get_all_features(feature_graphs, u, v)
                cnt += 1
        else:
            break

    print("Added %d" % (cnt - org))
    return features
Пример #2
0
def consolidate_features(base_graphs, Gcollab_delta, k):
    features = {}

    Gcollab = base_graphs[graphutils.Graph.COLLAB]
    feature_graphs = graphutils.split_feat_graphs(base_graphs)

    for node in Gcollab.Nodes():
        nodeID = node.GetId()

        for neighborID in graphutils.getKHopN(Gcollab, nodeID, k):
            if nodeID > neighborID:  # swap
                nodeID = neighborID + nodeID
                neighborID = nodeID - neighborID
                nodeID = nodeID - neighborID

            if (nodeID, neighborID) in features:
                continue

            features[(nodeID, neighborID)] = []

    for graph in feature_graphs:
        features = getFeatures(Gcollab, Gcollab_delta, graph, features)

    return features
Пример #3
0
def consolidate_features(base_graphs, Gcollab_delta, k):
	features = {}

	Gcollab = base_graphs[graphutils.Graph.COLLAB]
	feature_graphs = graphutils.split_feat_graphs(base_graphs)

	for node in Gcollab.Nodes():
		nodeID= node.GetId()

		for neighborID in graphutils.getKHopN(Gcollab, nodeID, k):
			if nodeID > neighborID:	# swap
				nodeID= neighborID + nodeID
				neighborID= nodeID - neighborID
				nodeID= nodeID - neighborID

			if (nodeID, neighborID) in features:
				continue

			features[(nodeID, neighborID)]= []
				
	for graph in feature_graphs:
		features = getFeatures(Gcollab, Gcollab_delta, graph, features)

	return features
Пример #4
0
def consolidate_features_add(base_graphs, k, Gcollab_delta):
	# Get all the k-hop features
	features = consolidate_features(base_graphs, Gcollab_delta, k)
	Gcollab_base = base_graphs[graphutils.Graph.COLLAB]
	feature_graphs = graphutils.split_feat_graphs(base_graphs)
	org = len(features)
	cnt = 0

	# Add positive features for edges that are not k-hop
	for edge in Gcollab_delta.Edges():
		if cnt<org:
			u = edge.GetSrcNId()
			v = edge.GetDstNId()
			tup = (u,v)

			# Not k-hop and not an edge in base graph
			if (u,v) not in features and (v,u) not in features and not Gcollab_base.IsEdge(u,v):
				features[tup] = get_all_features(feature_graphs, u, v)
				cnt+=1
		else:
			break

	print("Added %d"%(cnt-org))
	return features
Пример #5
0
feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1)
base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)

# from base graph take a random source node in every iteration
baseG = base_graphs[graphutils.Graph.COLLAB]
featG = graphutils.split_feat_graphs(base_graphs)

'''
followers = reader.followers()
baseG = graphutils.get_db_graph(graphutils.Graph.FOLLOW, uid, followers)

#Gp = snapext.EUNGraph()
#featG = [Gp, Gp, Gp]

featG = graphutils.split_feat_graphs(
    graphutils.get_feat_graphs(db, uid, None, date1))


class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
    def end_headers(self):
        self.send_head()
        SimpleHTTPServer.SimpleHTTPRequestHandler.end_headers(self)

    def send_head(self):
        self.send_header("Content-type", "application/json")
        self.send_header("Access-Control-Allow-Origin", "*")

    def do_GET(self):

        try:
            userid = self.path.lstrip("/")
Пример #6
0
def main(args):
	db = args[0]
	date1 = args[1]
	date2 = args[2]
	date3 = args[3]
	k = int(args[4])
	OUTFILE=args[5]

	if len(args)>=7:
		beta = float(args[6])
		bstart = beta
		bfinish = beta
	else:
		bstart = 0
		bfinish = 20

	reader = DBReader(db)
	print("Getting uid")
	uid = reader.uid()

	print("Getting all the base graphs")
	feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1)
	Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1)
	base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)

	print("Getting Gcollab_delta graph")
	Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2)

	# from base graph take a random source node in every iteration
	baseG = base_graphs[graphutils.Graph.COLLAB]
	featG = graphutils.split_feat_graphs(base_graphs)

	deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()]
	baseNIDs = [node.GetId() for node in baseG.Nodes()]
	common_nodes = list(set(deltaNIDs).intersection(baseNIDs))
	
	ktop = 20
	subGraphK= 10 
	f = open(OUTFILE,"w")

	print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f)		
	n_iterations = 10
	it = 0
	sources = []

	while it < n_iterations:
		src  = random.choice(common_nodes)
		subBaseG= graphutils.getSubGraph(baseG, src, subGraphK)
		#print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges()
		actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
		#actual = evaluate.getYList(baseG, Gcollab_delta, src)
	
		# Consider source node if it forms at least one edge in delta graph
		if len(actual)>0:
			sources.append(src)
			common_nodes.remove(src)
			it += 1
		else:
			print("Warning. Ignoring node with 0 new edges")
	print 'number of nodes and edges in graph:', baseG.GetNodes(), baseG.GetEdges()

	for beta in frange(bstart, bfinish, 4):
		total_recall = 0
		total_avg_rank= 0
		total_preck = 0

		for src in sources:
			subBaseG= graphutils.getSubGraph(baseG, src, subGraphK)
			print 'sub graph nodes:', subBaseG.GetNodes()
			print 'sub graph edges:', subBaseG.GetEdges()
			topIDs = runrw.runrw(subBaseG, featG, src, beta)	
			#topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta)	

			# compare topIDs with list of labels already formed	
			actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
			#actual = evaluate.getYList(baseG, Gcollab_delta, src)
	
			# ignore if the node did not form an edge in the delta	
			recall, preck, average_rank = evaluate.getAccuracy(topIDs, actual, ktop)	
			print recall, preck, average_rank
				
			total_recall+=recall
			total_avg_rank += average_rank
			total_preck += len(preck)
			
		num = float(n_iterations)
		print_and_log("%f\t%f\t%f\t%f\n"%(beta, total_recall/num, total_avg_rank/num, total_preck/num), f)
	
	f.close()	
Пример #7
0
feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1)
base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)

# from base graph take a random source node in every iteration
baseG = base_graphs[graphutils.Graph.COLLAB]
featG = graphutils.split_feat_graphs(base_graphs)

'''
followers = reader.followers()
baseG = graphutils.get_db_graph(graphutils.Graph.FOLLOW, uid, followers)

#Gp = snapext.EUNGraph()
#featG = [Gp, Gp, Gp]

featG = graphutils.split_feat_graphs(graphutils.get_feat_graphs(db, uid, None, date1))

class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

	def end_headers(self):
		self.send_head()
		SimpleHTTPServer.SimpleHTTPRequestHandler.end_headers(self)

	def send_head(self):
		self.send_header("Content-type", "application/json")
		self.send_header("Access-Control-Allow-Origin", "*")
		
	def do_GET(self):

		try:
			userid = self.path.lstrip("/")
Пример #8
0
def main(args):
    db = args[0]
    date1 = args[1]
    date2 = args[2]
    date3 = args[3]
    k = int(args[4])
    OUTFILE = args[5]

    if len(args) >= 7:
        beta = float(args[6])
        bstart = beta
        bfinish = beta
    else:
        bstart = 0
        bfinish = 20

    reader = DBReader(db)
    print("Getting uid")
    uid = reader.uid()

    print("Getting all the base graphs")
    feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1)
    Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1)
    base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs)

    print("Getting Gcollab_delta graph")
    Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2)

    # from base graph take a random source node in every iteration
    baseG = base_graphs[graphutils.Graph.COLLAB]
    featG = graphutils.split_feat_graphs(base_graphs)

    deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()]
    baseNIDs = [node.GetId() for node in baseG.Nodes()]
    common_nodes = list(set(deltaNIDs).intersection(baseNIDs))

    ktop = 20
    subGraphK = 10
    f = open(OUTFILE, "w")

    print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f)
    n_iterations = 10
    it = 0
    sources = []

    while it < n_iterations:
        src = random.choice(common_nodes)
        subBaseG = graphutils.getSubGraph(baseG, src, subGraphK)
        #print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges()
        actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
        #actual = evaluate.getYList(baseG, Gcollab_delta, src)

        # Consider source node if it forms at least one edge in delta graph
        if len(actual) > 0:
            sources.append(src)
            common_nodes.remove(src)
            it += 1
        else:
            print("Warning. Ignoring node with 0 new edges")
    print 'number of nodes and edges in graph:', baseG.GetNodes(
    ), baseG.GetEdges()

    for beta in frange(bstart, bfinish, 4):
        total_recall = 0
        total_avg_rank = 0
        total_preck = 0

        for src in sources:
            subBaseG = graphutils.getSubGraph(baseG, src, subGraphK)
            print 'sub graph nodes:', subBaseG.GetNodes()
            print 'sub graph edges:', subBaseG.GetEdges()
            topIDs = runrw.runrw(subBaseG, featG, src, beta)
            #topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta)

            # compare topIDs with list of labels already formed
            actual = evaluate.getYList(subBaseG, Gcollab_delta, src)
            #actual = evaluate.getYList(baseG, Gcollab_delta, src)

            # ignore if the node did not form an edge in the delta
            recall, preck, average_rank = evaluate.getAccuracy(
                topIDs, actual, ktop)
            print recall, preck, average_rank

            total_recall += recall
            total_avg_rank += average_rank
            total_preck += len(preck)

        num = float(n_iterations)
        print_and_log(
            "%f\t%f\t%f\t%f\n" % (beta, total_recall / num,
                                  total_avg_rank / num, total_preck / num), f)

    f.close()
Пример #9
0
import path
from learning import getFeatures
from db.interface import *
from analysis import graphutils

db = '../db/github.2013_1_20.2013_2_20.db'
reader = DBReader(db)
uid = reader.uid()

base_graphs = graphutils.get_db_graphs(db, uid)
Gcollab = base_graphs["Gcollab"]
feature_graphs = graphutils.split_feat_graphs(base_graphs)

reader.close()

start = Gcollab.GetRndNId()
walk_features = getFeatures.get_walk_features(Gcollab, feature_graphs, start)
print(walk_features)