예제 #1
0
    def fn(inst, idx, total):
        nonlocal shuffled

        # print('idx:', idx, 'total:', total)

        # do the shuffling, making a better CV
        # do only once
        if idx == 0:
            x, y = requires(['x', 'y'], inst)
            # print('x:', x)
            # print('y:', y)
            # print('add_fields:', add_fields)
            attachments = requires(add_fields, inst)
            # print('attachments:', attachments)
            data = list(zip(x, y, *attachments))
            # print(data)
            # return
            shuffle(data)
            shuffled = data

        avg_size = int(len(shuffled) / total)
        start = avg_size * idx
        size = avg_size if idx < total - 1 else len(shuffled) - start
        # print('idx:', idx)
        # print('start:', start)
        # print('size:', size)
        train = shuffled[:start] + shuffled[start + size:]
        train_x, train_y, *train_attachments = list(zip(*train))

        test = shuffled[start: start + size]
        test_x, test_y, *test_attachments = list(zip(*test))

        # print('test_x:', test_x)
        # print('test_y:', test_y)

        # print('train_y:', train_y)

        new_inst = inst\
            .set('x', train_x)\
            .set('y', train_y)\
            .set('x_test', test_x)\
            .set('y_test', test_y)

        # additional fields got its train and test versions as well
        for i, field in enumerate(add_fields):
            new_inst = new_inst\
                .set(field, train_attachments[i])\
                .set(field + '_test', test_attachments[i])

        return new_inst
예제 #2
0
    def fn(insts):
        result = []
        for inst in insts:
            val = requires(field, inst)
            result.append(val)

        return result
예제 #3
0
    def fn(insts):
        storage = {}

        for field in fields:
            storage[field] = []

        for inst in insts:
            vals = requires(fields, inst)

            for idx, field in enumerate(fields):
                storage[field].append(vals[idx])

        # print('storage:', storage)
        return storage