Exemplo n.º 1
0
save_location = '../../experiment_data/experiment_12'

k_list = [25, 50, 75, 100, 150, 200, 250, 300]

kernel_list = ["custom", "linear", "poly_2", "poly_3", "rbf"]

# results -> k -> kernal
results = []

limit_1 = 400  #training set
limit_2 = 400  #validation set

for k in k_list:
    print "Running experiment for k=" + str(k) + "..."
    ab = ABClassifier()
    ab.download_cursors(limit_unlabeled=5000, limit_labeled=5000)
    #ab.download_tweet_cursors(limit_unlabeled = 800, limit_labeled = 1000)
    ab.run_lsa(k=k)

    #pos_cursor_training = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).limit(400)
    #neg_cursor_training = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).limit(400)

    pos_cursor_training = ab.labeled_collection.find({
        "bully": True
    },
                                                     timeout=False).limit(400)
    neg_cursor_training = ab.labeled_collection.find({
        "bully": False
    },
                                                     timeout=False).limit(400)
Exemplo n.º 2
0
	results_obj['true_pos'] = 0
	results_obj['true_neg'] = 0
	results_obj['true_pos_rate'] = 0
	results_obj['true_neg_rate'] = 0
	results_obj['accuracy'] = 0
	results_obj['num_iterations'] = 0
	results[k] = results_obj



for i in xrange(0,20):
	for k in k_list:

		print "Running experiment for k = " + str(k)
		ab = ABClassifier()
		ab.download_cursors(limit_unlabeled = limit_1, limit_labeled = limit_1)
		ab.run_lsa(k=k)

		print "Starting classification..."

		unlabeled_cursor   = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False)
		pos_cursor = ab.labeled_collection.find({"bully":True},timeout=False).limit(unlabeled_cursor.count())
		neg_cursor = ab.labeled_collection.find({"bully":False},timeout=False).limit(unlabeled_cursor.count())

		unl = []
		pos = []
		neg = []

		for u,p,n in zip(unlabeled_cursor, pos_cursor, neg_cursor):
			
			u_obj = {}
Exemplo n.º 3
0
k_list = [25, 50, 75, 100, 150, 200, 250, 300]

kernel_list = ["custom", "linear", "poly_2","poly_3", "rbf"]

# results -> k -> kernal
results = []

limit_1 = 400 #training set
limit_2 = 400 #validation set


for k in k_list:
	print "Running experiment for k=" + str(k) + "..."
	ab = ABClassifier()
	ab.download_cursors(limit_unlabeled = 5000, limit_labeled = 5000)
	#ab.download_tweet_cursors(limit_unlabeled = 800, limit_labeled = 1000)
	ab.run_lsa(k=k)

	#pos_cursor_training = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).limit(400)
	#neg_cursor_training = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).limit(400)

	pos_cursor_training = ab.labeled_collection.find({"bully":True},timeout=False).limit(400)
	neg_cursor_training = ab.labeled_collection.find({"bully":False},timeout=False).limit(400)

	training = []
	tlabels = []

	pos_validation = []
	pos_vlabels = []
	neg_validation = []
Exemplo n.º 4
0
results = {}
for t in thresholds:
    results_obj = {}
    results_obj['t'] = t
    results_obj['pos_list_size'] = 0
    results_obj['neg_list_size'] = 0
    results_obj['true_pos'] = 0
    results_obj['true_neg'] = 0
    results_obj['true_pos_rate'] = 0
    results_obj['true_neg_rate'] = 0
    results_obj['accuracy'] = 0
    results_obj['num_iterations'] = 0
    results[int(t * 100)] = results_obj

ab = ABClassifier()
ab.download_cursors(limit_unlabeled=limit_1, limit_labeled=limit_1)
ab.run_lsa(k=k)
context_tweets = [
    "Literally go f**k yourself, because you're honestly pathetic.",
    "f**k you f****n w***e go f**k yourself stupid bitch",
    "but this bad I want to kick her ass cuz she thinks she's a hard chola like sit ur fat ass down lol",
    "course he did he's a whipped bitch that will say anything to make u happy, unlike ur mum who called u fat",
    "Fat pig. You're disgusting.",
    "From some illiterate online keyboard warrior? Go back to sucking your butt buddy's fat junk.",
    "God Says; Evil Don't Know The Way. You are gay with AIDS & your sin cost you your anointing! This is why you use DUST!"
    "f**k you stupid f****t f*g"
]

tweet_cvs = []
for c in context_tweets:
    tweet_cvs.append(ab.get_context_vector(c))