# __author__ = 'basil.beirouti'

import csv, datetime, random
from BM25 import last_thousand
from BM25.Scheduling import whos_on, read_filtered_csv_file, read_raw_schedule_csv, write_filtered_csv_file, this_year, docmatrix_data
from BM25.Plugins import tuples_tse_psums_concat
from BM25.TextCleaning import wordslist2string, cleanStringAndLemmatize
from BM25.BM25Okapi import QueryMaster, DocMatrix

def rand_divide(data, proportion):
    lendata = len(data)
    numgroup1 = round(proportion*lendata)
    numgroup2 = lendata-numgroup1
    random.shuffle(data)
    random.shuffle(data)
    group1 = data[0:numgroup1]
    group2 = data[numgroup1:]
    assert(numgroup1 == len(group1))
    assert(numgroup2 == len(group2))
    return group1, group2

rows = read_filtered_csv_file()
on_now = whos_on(rows)
personids = [el[1] for el in on_now]
out = docmatrix_data(personids, 500)
srnums, badgenums, personids, fns, lns, psums, dates = zip(*out)
tupsdata = [(el[3] + el[4], el[5]) for el in out]
cleaned_data= [(el[0], wordslist2string(cleanStringAndLemmatize(el[1]))) for el in tupsdata]
train, test = rand_divide(cleaned_data, 0.75)
processed_data = tuples_tse_psums_concat(train)
DocMatrix(processed_data, )
    random.shuffle(data)
    random.shuffle(data)
    group1 = data[0:numgroup1]
    group2 = data[numgroup1:]
    assert(numgroup1 == len(group1))
    assert(numgroup2 == len(group2))
    return group1, group2

train1, test1 = rand_divide(data_vmax, 0.75)
train2, test2 = rand_divide(data_vnx, 0.75)
train = train1 + train2
test = test1 + test2

print("divided data into training and testing sets")

tups_train = tuples_tse_psums_concat(train)
tups_train2 = tse_psums_concat(train)

print("grouped problem summaries by TSE")

# evaluator = Bm25Eval(tups_train, test)
# print("running evaluation")
# evaluator.evaluatealgorithm()

def testfunction(tups_train):
    okapi_docmatrix = DocMatrix(tups_train, bm25 = True, ngrams_range = (1,1))
    query_master = QueryMaster(okapi_docmatrix)
    start = time.time()
    query_master.evaluatealgorithm(test, 1)
    query_master.evaluatealgorithm(test, 10)
    stop = time.time()