Пример #1
0
    tmp = line.split()
    if len(tmp) == 3:
        i = int(tmp[0])
        w = tmp[2]
        l = int(tmp[1])
        if i == prev:
            wtol[w].append(l)
        else:
            wtol[w] = [l]
            itow[i] = w
        prev = i
    elif len(tmp) == 2:
        itow[int(tmp[0])] = tmp[1]  
fwl.close()

wtolu = sm.readwl("/home/ec2-user/git/statresult/wordslist_dsw_top1000.txt")


tmp = []
bk = {}
fbk = open(sys.argv[1],"r")
for line in fbk:
    if len(line) < 2:
        for term in tmp:
            bk[term] = tmp
        tmp =[]
    else:
        tmp.append(itow[int(line.split()[0])])
fbk.close()

otype = int(sys.argv[3])
Пример #2
0
otype = 0
if sys.argv[6] == 'stdout':
    otype = 1


def vecof(lines, a, wtol, kk):
    vec = np.zeros(kk)
    for line in lines:
        line = line.strip('\n')
        vec = vec + a[:, wtol[line]]
    return vec


stype = int(sys.argv[5])
zipf = float(sys.argv[4])
wtola = sm.readwl("/home/ec2-user/git/statresult/wordslist_dsw.txt")
#zipf = crand.zipf_init(len(wtola))
if stype == 3:
    wtolu = sm.readwl(
        "/home/ec2-user/git/statresult/wordslist_top10000_dsw.txt")
else:
    wtolu = wtola
a = np.load('/home/ec2-user/data/classinfo/vt.npy')  #lsa result
b = a
ukk = a.shape[0]
s = None
akk = ukk
if stype == 3:
    #    b = np.load('/home/ec2-user/git/statresult/lda-30-2000-phi.npy')
    #    s = np.load('/home/ec2-user/git/statresult/lda-30-2000-pz.npy')
    b = np.load('/home/ec2-user/git/statresult/lda-32-2000-top10000-phi.npy')
Пример #3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import math
from os import path

sys.path.append( path.dirname( path.dirname( path.abspath(__file__) ) ) )
from pythonlib import semantic as sm


num = 0
tin = 0

wtol = sm.readwl("/home/ec2-user/git/statresult/wordslist_dsw_top1000.txt")

for root, dirs, files in os.walk('/home/ec2-user/data/topics/'):
    for name in files:
        filename = root + '/' + name
        if name.isdigit():
            fin = open(filename+'.txt','r')
            temp = fin.readlines()
            fin.close()
            for i in temp:
                i = i.strip('\n')
                num = num +1
                if i in wtol:
                    tin = tin + 1

print num,tin