if samplesize > min_sample_size:
            trainindices = list(range(samplesize))
            # load vectors into train and test collections
            for i in range(samplesize):
                dest = ''
                if len(trainindices) > 0 and trainindices[0] == i:
                    dest = 'train'
                    trainindices = trainindices[1:]
                else:
                    dest = 'test'
                sample = samples[i]
                # samples are of form holmatrixrow, argstring, weight
                if sample[1] != '':
                    # import of vectors
                    hol_v = holmatrix[int(sample[0])]
                    arg_v = arg_data.vector(sample[1])
                    weight = float(sample[2])

                    #apply standard scaling
                    scaler = sklearn.preprocessing.StandardScaler(
                        with_std=False)
                    arg_v = scaler.fit_transform(arg_v[:, np.newaxis])
                    arg_v = np.squeeze(arg_v)

                    #apply standard scaling
                    scaler = sklearn.preprocessing.StandardScaler()
                    scaler.set_params(with_std=variance_control)
                    arg_v = scaler.fit_transform(arg_v[:, np.newaxis])
                    arg_v = np.squeeze(arg_v)
                    if variance_control:
                        arg_v = mean_std * arg_v
        verb = re.search(r'^(\w)+(?=\|)', line).group(0)
        rel = re.search(r'(?<=\|)(\w)+(?=\|)', line).group(0)
        arg = re.search(r'(?<=(O|S)\|).+$', line).group(0)

    key = verb + '|' + rel
    # check if this is a new row
    if oldkey != key:
        verbarray = np.vstack([
            verbarray,
            np.array([key, np.array([np.array([0, ''], object)], object)],
                     object)
        ])
        verbindex += 1
    oldkey = key

    arg_v = np.array(r.vector(arg))
    if np.array_equal(arg_v, control):
        new_args = t.transform(arg)
        arg_str = ''
        for w in new_args:
            new_v = np.array(r.vector(w))
            if not np.array_equal(new_v, control):
                arg_str = w
                break
    else:
        arg_str = arg
    if not arg_str:
        log.write(arg + '\t\t' + str(new_args) + '\n')
        failedcount += 1
    else:
        verbarray[-1][1] = np.vstack(