示例#1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('train_pattern', type=str, help='pattern of names for training files')
    parser.add_argument('test_pattern', type=str, help='pattern of names for testing files')
    args = parser.parse_args()

    train_files = glob.glob(args.train_pattern)
    test_files = glob.glob(args.test_pattern)
    if len(train_files) != len(test_files):
        raise ValueError('Number of training files not equal to number of testing files')

    popular = PopularityStrategy()
    entropy = EntropyStrategy()
    entropy_zero = EntropyZeroStrategy()
    nums = [5, 10, 15]
    for count, (train_n, test_n) in enumerate(zip(train_files, test_files)):
        print "Processing %d fold with cold start" % count
        train = pd.read_csv(train_n, header=None, names=['user', 'item', 'rating', 'time'])
        test = pd.read_csv(test_n, header=None, names=['user', 'item', 'rating', 'time'])
        test_ids = test.user.unique()
        user_fit = train.user.isin(test_ids)
        train_other_folds = train[~user_fit]
        train_this_fold = train[user_fit]
        for n in nums:
            select_fn = 'rated'
            list_fn = 'list'
            popular.write_train_test_movielens(train_n, test_n, select_fn, list_fn,
                                     train_this_fold, train_other_folds, test, n)
            entropy.write_train_test_movielens(train_n, test_n, select_fn, list_fn,
                                     train_this_fold, train_other_folds, test, n)
            entropy_zero.write_train_test_movielens(train_n, test_n, select_fn, list_fn,
                                          train_this_fold, train_other_folds, test, n)
 def test_entropy_zero(self):
     count1 = pd.Series(data=[30, 30, 30])
     assert abs(EntropyZeroStrategy.entropy_zero(count1, 5, 100) - 0.21794) < 0.0001
     count1 = pd.Series(data=[50])
     assert abs(EntropyZeroStrategy.entropy_zero(count1, 5, 100) - 0.094520) < 0.0001