def run_group(): file_dir = "/home/pyongjoo/workspace/tweetsprocess/data/name-feature/" infile = file_dir + "screename-May10-AlmostVerified.csv" print infile csvreader = csv.reader(open(infile, 'rb')) conf_matrix = [] for i in range(4): conf_matrix.append([0,0,0,0]) for row in csvreader: screen_name = row[0] fullname = row[1] age = row[2] firstname = (fullname.split(' '))[0] age_group = babyname.ageToAgeGroup(age) group_prob = babyname.probHashInGroupForName(firstname) predicted_group = babyname.maxLikelyGroupForName(firstname) print (age_group, predicted_group, group_prob[0], group_prob[1], group_prob[2], group_prob[3]) if predicted_group != -1: conf_matrix[age_group][predicted_group] += 1 print "Confusion Matrix:" for i in range(len(conf_matrix)): for j in range(len(conf_matrix[0])): sys.stdout.write(str(conf_matrix[i][j]) + " ") print
def run_year(): file_dir = "/home/pyongjoo/workspace/tweetsprocess/data/name-feature/" infile = file_dir + "screename-May10-AlmostVerified.csv" print infile csvreader = csv.reader(open(infile, 'rb')) conf_matrix = [] for i in range(4): conf_matrix.append([0,0,0,0]) for row in csvreader: screen_name = row[0] fullname = row[1] age = row[2] firstname = (fullname.split(' '))[0] predicted_year = babyname.maxLikelyYearForName(firstname) predicted_age = 2012 - predicted_year age_group = babyname.ageToAgeGroup(age) predicted_group = -1 try: predicted_group = babyname.ageToAgeGroup(predicted_age) except Exception: pass print predicted_year, predicted_group if predicted_group != -1: conf_matrix[age_group][predicted_group] += 1 print "Confusion Matrix:" for i in range(len(conf_matrix)): for j in range(len(conf_matrix[0])): sys.stdout.write(str(conf_matrix[i][j]) + " ") print
def run_group(): file_dir = "/home/pyongjoo/workspace/twitter-research/data/" infile = file_dir + "ageEmbededTweets-Jun19-sampled2.0.json" print infile document = json.loads(open(infile).read()) conf_matrix = [] for i in range(4): conf_matrix.append([0,0,0,0]) libsvmoutfile = file_dir + "prob-Jun19.libsvm" libsvmout = open(libsvmoutfile, 'w') for tweetDoc in document: fullname = tweetDoc['user']['name'] age = tweetDoc['user']['age'] firstname = (fullname.split(' '))[0] firstname = firstname.encode('ascii', 'ignore') age_group = babyname.ageToAgeGroup(age) prob_hash = babyname.probHashInGroupForName(firstname) prob_array = [prob_hash[k] for k in sorted(prob_hash.keys())] predicted_group = (-1 if sum(prob_hash.values()) == 0 else prob_array.index(max(prob_array))) if predicted_group != -1: libsvmout.write(str(age_group) + ' ') for group, prob in prob_hash.iteritems(): libsvmout.write(str(group) + ':' + str(prob) + ' ') libsvmout.write('\n') # add to confusion matrix if predicted_group != -1: conf_matrix[age_group][predicted_group] += 1 libsvmout.close() print "Confusion Matrix:" for i in range(len(conf_matrix)): for j in range(len(conf_matrix[0])): sys.stdout.write(str(conf_matrix[i][j]) + " ") print