Пример #1
0
def getGt(n=1558):
    gts = list()
    for i in range(n):
        for j in range(n):
            if i == j:
                gts.append("1")
            else:
                gts.append("0")
    ut.writeList2Line(predictionPath, gtFilename, gts)
Пример #2
0
def getGt(n=1558):
	gts = list()
	for i in range(n):
		for j in range(n):
			if i == j:
				gts.append("1")
			else:
				gts.append("0")
	ut.writeList2Line(predictionPath, gtFilename, gts)
Пример #3
0
def nm(threshold=0.67):
	instances = ut.readCommaLine2List(outputPath, featureNmFilename)
	predictions = list()
	count = 0
	for instance in instances:
		if (float(instance[1]) > threshold) or (float(instance[2])>threshold):
			count += 1
			predictions.append("1")
		else:
			predictions.append("0")
	ut.writeList2Line(predPath, predictionNmFilename, predictions)
Пример #4
0
def writeMappingCandidates():
	mappings = ut.readCommaLine2List(inputPath, "twitterMapping")
	candidates_google = list()
	candidates_twitter = list()
	for mapping in mappings:
		google_id = mapping[0]
		twitter_url = mapping[1]
		twitter_name = getTwitterUsername(twitter_url)
		if twitter_name != "":
			candidates_google.append(google_id)
			candidates_twitter.append(twitter_name)
	ut.writeList2Line(inputPath, "google/ids_mapping", candidates_google) 
	ut.writeList2Line(inputPath, "twitter/names_mapping", candidates_twitter)
Пример #5
0
def ranking(n=1558, filename="ranking_origin_1558.txt"):
	scores = ut.readLine2List(predPath, filename)
	preds = list()
	for i in range(n):
		# print(i*n)
		scores_i = scores[i*n:(i+1)*n]
		max_index = max(enumerate(scores_i), key=lambda k: float(k[1]))[0]
		# print(max_index)
		preds_i = ["0"]*1558
		preds_i[max_index] = "1"
		preds += preds_i
	ut.writeList2Line(predPath, predictionRankFilename, preds)
	return preds
Пример #6
0
def writeMappingCandidates():
    mappings = ut.readCommaLine2List(inputPath, "twitterMapping")
    candidates_google = list()
    candidates_twitter = list()
    for mapping in mappings:
        google_id = mapping[0]
        twitter_url = mapping[1]
        twitter_name = getTwitterUsername(twitter_url)
        if twitter_name != "":
            candidates_google.append(google_id)
            candidates_twitter.append(twitter_name)
    ut.writeList2Line(inputPath, "google/ids_mapping", candidates_google)
    ut.writeList2Line(inputPath, "twitter/names_mapping", candidates_twitter)
Пример #7
0
def writeMissingGooglePosts():
	ids = ut.readLine2List("../data/google/", "ids_mapping")
	ids_parsed = list()
	ids_errors = list()
	for root, folder, filenames in os.walk("../data/google/wall"):
		ids_parsed = filenames
		ids_errors = list(set(ids)-set(ids_parsed))
		for filename in filenames:
			with open(os.path.join(root, filename), "r", errors="ignore") as fi:
				try:
					result = json.loads(fi.read())
					if type(result) == dict:
						ids_errors.append(filename)
				except:
					pass
	ut.writeList2Line("../data/stat/", "google_ids_post_errors", ids_errors)
Пример #8
0
def writeMissingGooglePosts():
    ids = ut.readLine2List("../data/google/", "ids_mapping")
    ids_parsed = list()
    ids_errors = list()
    for root, folder, filenames in os.walk("../data/google/wall"):
        ids_parsed = filenames
        ids_errors = list(set(ids) - set(ids_parsed))
        for filename in filenames:
            with open(os.path.join(root, filename), "r",
                      errors="ignore") as fi:
                try:
                    result = json.loads(fi.read())
                    if type(result) == dict:
                        ids_errors.append(filename)
                except:
                    pass
    ut.writeList2Line("../data/stat/", "google_ids_post_errors", ids_errors)
Пример #9
0
def oneMapping(scores, outputFilename=predictionRankConstraintFilename, n=1558):
	users1 = list()
	users2 = dict()
	predictions = list()
	results = list()
	# init
	for i in range(n):
		users2[i] = {"active": 0, "user": 0, "index": 0, "score": 0}
	for i in range(n):
		scores_i = scores[n*i:n*(i+1)]
		scores_i_sorted = sorted(enumerate(scores_i), key=lambda k: k[1], reverse=True)
		users1.append(scores_i_sorted)
	# choose one mapping 
	for i in range(n):
		oneMappingRecur(users1, users2, i, 0)
	results = sorted([(v["user"], k) for k, v in users2.items()], key=operator.itemgetter(0))
	for pair in results:
		user2 = pair[1]
		predictions_i = ["0"]*n
		predictions_i[user2] = "1"
		predictions+=predictions_i
	ut.writeList2Line(predPath, outputFilename, predictions)	
Пример #10
0
def reviseIdFile():
	ids_visited = ut.readLine2List(snFolder, idsVisitedFileName+"2")
	ids_saw = ut.readLine2List(snFolder, idsSawFileName)
	loss = ut.readLine2List(snFolder, "tmp_ids")

	# revise id file duplicate problem
	g=nx.Graph()
	dup = list()
	num = list()
	for i in range(len(ids_saw)):
		id = ids_saw[i]
		try:
			g.node[id]
			dup.append(id)
			num.append(i)
		except:
			g.add_node(id)
	print(len(dup))
	for i in range(len(num)-1, -1, -1):
		pos = num[i]
		del ids_saw[pos]
	for l in loss:
		ids_saw.append(l)
	ut.writeList2Line("../data/google/", "ids_saw2", ids_saw)
Пример #11
0
def reviseIdFile():
    ids_visited = ut.readLine2List(snFolder, idsVisitedFileName + "2")
    ids_saw = ut.readLine2List(snFolder, idsSawFileName)
    loss = ut.readLine2List(snFolder, "tmp_ids")

    # revise id file duplicate problem
    g = nx.Graph()
    dup = list()
    num = list()
    for i in range(len(ids_saw)):
        id = ids_saw[i]
        try:
            g.node[id]
            dup.append(id)
            num.append(i)
        except:
            g.add_node(id)
    print(len(dup))
    for i in range(len(num) - 1, -1, -1):
        pos = num[i]
        del ids_saw[pos]
    for l in loss:
        ids_saw.append(l)
    ut.writeList2Line("../data/google/", "ids_saw2", ids_saw)