Пример #1
0
 def import_idfs(dirname):
     return [Counters.import_weights(path.join(dirname, f), weights.idf) for f in os.listdir(dirname)]
Пример #2
0
 def import_tfs(dirname):
     return [(f, Counters.import_weights(path.join(dirname, f), weights.tf)) for f in os.listdir(dirname)]
Пример #3
0
from os import path
from counters import Counters
from rating import RIC
from loader import Loader
import matplotlib.pyplot as plt
import weights

sets_dir = '/home/rsuvorov/projects/dcfs/data'
white_set = 'en_var_1'
black_set = 'en_porno_1'
white_dir = path.join(sets_dir, white_set)
black_dir = path.join(sets_dir, black_set)

white_docs = Loader.import_tfs(path.join(white_dir, 'tf'))
white_idf = Counters.import_weights(path.join(white_dir, white_set + '.idf'), weights.idf)
black_docs = Loader.import_tfs(path.join(black_dir, 'tf'))
black_idf = Counters.import_weights(path.join(black_dir, black_set + '.idf'), weights.idf)

rater = RIC(black_idf, white_idf)

white_ratings = [(fname, rater(tf)) for (fname, tf) in white_docs]
black_ratings = [(fname, rater(tf)) for (fname, tf) in black_docs]

white_dat = open(path.join(white_dir, "ratings.csv"), 'w')
for (fname, r) in white_ratings:
    print >> white_dat, fname, r

black_dat = open(path.join(black_dir, "ratings.csv"), 'w')    
for (fname, r) in black_ratings:
    print >> black_dat, fname, r