def output_html(): target_file = get_target_file() print "processing:", target_file fi = file(target_file) from lr import learn, make_feature_matrix lr = learn() lines = fi.readlines() X = make_feature_matrix(lines) ps = lr.predict_proba(X)[:, 1] data = [] for line, p in zip(lines, ps): if line.startswith("RT "): continue if p < args.threshold: continue if p > args.upper_limit: continue items = line.split("\t") url = "https://twitter.com/{1}/status/{2}".format(*items) data.append(dict(url=url, score=p, text=items[0])) print len(data) render(data, target_file)
def add_train_data(): target_file = get_target_file() print "processing:", target_file fi = file(target_file) from lr import learn, make_feature_matrix lr = learn() lines = fi.readlines() X = make_feature_matrix(lines) ps = lr.predict_proba(X)[:, 1] data = [] for line, p in zip(lines, ps): if line.startswith("RT "): continue if p < args.threshold: continue if p > args.upper_limit: continue print line print p items = line.split("\t") url = "https://twitter.com/{1}/status/{2}".format(*items) print url ret = raw_input("negative(z), neutral(x), positive(c)>") if ret == "c": fo = file("positive.txt", "a") fo.write(line) fo.close() elif ret == "z": fo = file("negative.txt", "a") fo.write(line) fo.close() print
def output_html(lr): fi = file(OUT_FILE) lines = [] used = [] for line in fi: if line.startswith("RT "): continue if any(line.startswith(x) for x in used): continue used.append(line[:30]) lines.append(line) lines.reverse() X = make_feature_matrix(lines) ps = lr.predict_proba(X)[:, 1] data = [] for line, p in zip(lines, ps): if p < 0.6: continue items = line.split('\t') url = "https://twitter.com/{1}/status/{2}".format(*items) data.append(dict(url=url, score=p, text=items[0])) print len(data) from filter import render render(data, OUT_FILE)