Пример #1
0
def output_html():
    target_file = get_target_file()
    print "processing:", target_file
    fi = file(target_file)
    from lr import learn, make_feature_matrix

    lr = learn()

    lines = fi.readlines()
    X = make_feature_matrix(lines)
    ps = lr.predict_proba(X)[:, 1]

    data = []
    for line, p in zip(lines, ps):
        if line.startswith("RT "):
            continue
        if p < args.threshold:
            continue
        if p > args.upper_limit:
            continue
        items = line.split("\t")
        url = "https://twitter.com/{1}/status/{2}".format(*items)
        data.append(dict(url=url, score=p, text=items[0]))
    print len(data)
    render(data, target_file)
Пример #2
0
def add_train_data():
    target_file = get_target_file()
    print "processing:", target_file
    fi = file(target_file)
    from lr import learn, make_feature_matrix

    lr = learn()

    lines = fi.readlines()
    X = make_feature_matrix(lines)
    ps = lr.predict_proba(X)[:, 1]

    data = []
    for line, p in zip(lines, ps):
        if line.startswith("RT "):
            continue
        if p < args.threshold:
            continue
        if p > args.upper_limit:
            continue
        print line
        print p
        items = line.split("\t")
        url = "https://twitter.com/{1}/status/{2}".format(*items)
        print url
        ret = raw_input("negative(z), neutral(x), positive(c)>")
        if ret == "c":
            fo = file("positive.txt", "a")
            fo.write(line)
            fo.close()
        elif ret == "z":
            fo = file("negative.txt", "a")
            fo.write(line)
            fo.close()
        print
Пример #3
0
def output_html(lr):
    fi = file(OUT_FILE)

    lines = []
    used = []
    for line in fi:
        if line.startswith("RT "): continue
        if any(line.startswith(x) for x in used):
            continue
        used.append(line[:30])
        lines.append(line)
    lines.reverse()

    X = make_feature_matrix(lines)
    ps = lr.predict_proba(X)[:, 1]

    data = []
    for line, p in zip(lines, ps):
        if p < 0.6: continue
        items = line.split('\t')
        url = "https://twitter.com/{1}/status/{2}".format(*items)
        data.append(dict(url=url, score=p, text=items[0]))
    print len(data)
    from filter import render
    render(data, OUT_FILE)