def sample_train(input_file):
    closed_count = cu.get_closed_count(input_file)
    sample = reservoir_sample(cu.iter_open_questions(input_file), closed_count)
    sample.extend(cu.iter_closed_questions(input_file))
    random.shuffle(sample)
    header = cu.get_header(input_file)
    return header, sample
示例#2
0
def sample_train(input_file):
    closed_count = cu.get_closed_count(input_file)
    sample = reservoir_sample(cu.iter_open_questions(input_file), closed_count)
    sample.extend(cu.iter_closed_questions(input_file))
    random.shuffle(sample)
    header = cu.get_header(input_file)
    return header, sample
def sample_train(input_file):
    print("get closed question count")
    closed_count = cu.get_closed_count(input_file)
    print("sample open questions")
    sample = reservoir_sample(cu.iter_open_questions(input_file), closed_count)
    print("get all closed questions")
    sample.extend(cu.iter_closed_questions(input_file))
    print("shuffle all the data")
    random.shuffle(sample)
    header = cu.get_header(input_file)
    return header, sample
def sample_train(input_file):
    print("get closed question count")
    closed_count = cu.get_closed_count(input_file)
    print("sample open questions")
    sample = reservoir_sample(cu.iter_open_questions(input_file), closed_count)
    print("get all closed questions")
    sample.extend(cu.iter_closed_questions(input_file))
    print("shuffle all the data")
    random.shuffle(sample)
    header = cu.get_header(input_file)
    return header, sample