import fileinput import sys import time from liwc import countcat from web import isurl, isquestion, get_files print 'id\tts\tpos\tneg\tword\tis_url\tquestion\tclicked' for line in get_files(): if len(line) == 4: id, query, ts, _ = line clicked = 1 else: id, query, ts, _, _ = line clicked = 0 if id == 'AnonID': continue # emotion counts = countcat(query) pos = counts[0]; neg = counts[1]; word = counts[2] # internet competence is_url = int(isurl(query)) is_question = 1 ts = int(time.mktime(time.strptime(ts, '%Y-%m-%d %H:%M:%S'))) print '\t'.join([str(x) for x in [id, ts, pos, neg, word, is_url, is_question, clicked]])
#!/usr/bin/env python from liwc import countcat, header import csv """ Combine project data and output from liwc (i.e. output of wordcount_liwc.py) """ (_projectid,_teacher_acctid,title,short_description,need_statement,essay, paragraph1,paragraph2,paragraph3,paragraph4) = range(10) essays = open('../data/essays.csv') out = open('../data/liwc_out', 'w') headers = ['_projectid'] + header() out.write('\t'.join(headers) + '\n') essays.readline() # get rid of headers for line in csv.reader(essays): lst = countcat(' '.join(line[short_description:])) out.write('\t'.join(str(x) for x in [line[_projectid]] + lst) + '\n')