def work(in_h5, out_csv_file, nest, njobs):

    from h5pipes import h5open
    from pypipes import getitem, as_key, del_key
    from nppipes import (as_array, fit_transform, transform, fit, predict,
                         savetxt, stack, clip)
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.preprocessing import StandardScaler
    from xgboost import XGBRegressor

    nominal_cidx = [
        0, 1, 2, 4, 5, 6, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
        27, 29, 30, 31, 32, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 52,
        53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72,
        73, 74, 75, 76, 77
    ]

    data = (
        (in_h5, )
        | h5open
        | as_key('file')
        | as_key(
            'train_X', lambda d: (d['file'], )
            | getitem('train_X')
            | as_array
            | P.first)
        | as_key(
            'train_y', lambda d: (d['file'], )
            | getitem('train_y')
            | as_array
            | P.first)
        | as_key(
            'test_X', lambda d: (d['file'], )
            | getitem('test_X')
            | as_array
            | P.first)
        | as_key(
            'train_labels', lambda d: (d['file'], )
            | getitem('train_labels')
            | as_array
            | P.first)
        | as_key(
            'test_labels', lambda d: (d['file'], )
            | getitem('test_labels')
            | as_array
            | P.first)
        | as_key(
            'one_hot', lambda _:
            (OneHotEncoder(categorical_features=nominal_cidx, sparse=False), ))
        | as_key(
            'train_X', lambda d: (d['train_X'].copy(), )
            | fit_transform(d['one_hot'])
            | P.first)
        | as_key(
            'test_X', lambda d: (d['test_X'].copy(), )
            | transform(d['one_hot'])
            | P.first)
        | del_key('one_hot')
        | as_key('std_scaler', lambda _: (StandardScaler(), ))
        | as_key(
            'train_X', lambda d: (d['train_X'].copy(), )
            | fit_transform(d['std_scaler'])
            | P.first)
        | as_key(
            'test_X', lambda d: (d['test_X'].copy(), )
            | transform(d['std_scaler'])
            | P.first)
        | del_key('std_scaler')
        | as_key(
            'XGBReg',
            lambda d: (
                XGBRegressor(
                    seed=1,
                    n_estimators=nest,  #n_jobs=njobs,
                    #verbose=1,
                    #max_features=1.0, min_samples_leaf=1.0,
                    max_depth=50), )
            | fit((d['train_X'], ), (d['train_y'], ))
            | P.first)
        | as_key(
            'y_hat', lambda d: (d['test_X'], )
            | predict((d['XGBReg'], ))
            | clip(1, 8)
            | P.first)
        | del_key('XGBReg')
        | P.first)

    ((data['test_labels'], data['y_hat'])
     | stack(axis=1)
     | savetxt(out_csv_file,
               delimiter=',',
               fmt=['%d', '%d'],
               header='"Id","Response"',
               comments='')
     | P.first)

    return
def work(in_h5,
         out_csv_file,
         nest,
         njobs):

    from h5pipes import h5open
    from pypipes import getitem,as_key,del_key
    from nppipes import as_array,fit_transform,transform,fit,predict,savetxt,stack
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import ExtraTreesRegressor


    nominal_cidx = [0, 1, 2, 4, 5, 6, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23,
                 24, 25, 26, 27, 29, 30, 31, 32, 38, 39, 40, 41, 42, 43, 44, 45,
                 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59,
                 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 77]

    data = (
        (in_h5,)
        | h5open
        | as_key('file')
        | as_key('train_X', lambda d:
            (d['file'],)
            | getitem('train_X')
            | as_array
            | P.first
            )
        | as_key('train_y', lambda d:
            (d['file'],)
            | getitem('train_y')
            | as_array
            | P.first
            )
        | as_key('test_X', lambda d:
            (d['file'],)
            | getitem('test_X')
            | as_array
            | P.first
            )
        | as_key('train_labels', lambda d:
            (d['file'],)
            | getitem('train_labels')
            | as_array
            | P.first
            )
        | as_key('test_labels', lambda d:
            (d['file'],)
            | getitem('test_labels')
            | as_array
            | P.first
            )

        | as_key('one_hot', lambda _:
            (OneHotEncoder(categorical_features=nominal_cidx, sparse=False),))
        | as_key('train_X', lambda d:
            (d['train_X'].copy(),)
            | fit_transform(d['one_hot'])
            | P.first
            )
        | as_key('test_X', lambda d:
            (d['test_X'].copy(),)
            | transform(d['one_hot'])
            | P.first
            )
        | del_key('one_hot')

        | as_key('std_scaler', lambda _: (StandardScaler(),))
        | as_key('train_X', lambda d:
            (d['train_X'].copy(),)
            | fit_transform(d['std_scaler'])
            | P.first
            )
        | as_key('test_X', lambda d:
            (d['test_X'].copy(),)
            | transform(d['std_scaler'])
            | P.first
            )
        | del_key('std_scaler')

        | as_key('RFReg', lambda d:
            (ExtraTreesRegressor(random_state=1,
                                 n_estimators=nest, n_jobs=njobs,
                                 verbose=1,
                                 max_features=1.0, min_samples_leaf=1.0,
                                 max_depth=50),)
            | fit((d['train_X'],), (d['train_y'],))
            | P.first
            )
        | as_key('y_hat', lambda d:
            (d['test_X'],)
            | predict((d['RFReg'],))
            | P.first
            )
        | del_key('RFReg')

        | P.first
    )

    (
        (data['test_labels'], data['y_hat'])
        | stack(axis=1)
        | savetxt(out_csv_file,
                  delimiter=',',
                  fmt=['%d', '%d'],
                  header='"Id","Response"', comments='')
        | P.first
    )

    return