Пример #1
0
    def __init__(self,
        num_nodes,
        n,
        p,
        max_rounds=10,
        h=0.01,
        noisy=False,
        bias=False):

        self.num_nodes = num_nodes
        self.n = n
        self.max_rounds = max_rounds
        self.h = h
        self.noisy = noisy
        self.bias = bias
        self.p = p + 1 if self.bias else p

        self.init_params = np.random.randn(
            self.p * self.num_nodes, 1)

        self.w = np.random.randn(
            self.p * self.num_nodes, 1)
        ps = [p] * self.num_nodes
        ws = [np.copy(self.w[i*self.p:(i+1)*self.p])
              for i in xrange(self.num_nodes)]
        loaders = get_LRGL(
            self.n, 
            ps,
            ws=ws,
            noisys=[self.noisy] * self.num_nodes,
            bias=bias)

        self.servers = [BS(l) for l in loaders]
        self.get_model = lambda i: LR(self.p * self.num_nodes, i)
        self.w_hat = None
def mapConf2Model(name):
    conf = d_name_conf[name]
    model_name = name.split('_')[0]
    #if model_name != 'lr' and model_name != 'fm' and model_name != 'DINN':
    #    conf['layer_sizes'] = [FIELD_SIZES, 10, 1]
    if model_name in set(['lr', 'fm']):
        conf['input_dim'] = INPUT_DIM
    print 'conf', conf
    if model_name == 'ffm':
        return FFM(**conf)
    elif model_name == 'fwfm':
        conf['layer_sizes'] = [FIELD_SIZES, 10, 1]
        return FwFM(**conf)
    elif model_name == 'fwfm3':
        conf['layer_sizes'] = [FIELD_SIZES, 10, 1]
        return FwFM3(**conf)
    elif model_name == 'fm':
        return FM(**conf)
    elif model_name == 'lr':
        return LR(**conf)
    elif model_name == 'fwfmoh':
        return FwFM_LE(**conf)
    elif model_name == 'MTLfwfm':
        conf['index_lines'] = utils.index_lines
        conf['num_lines'] = FIELD_SIZES[utils.index_lines]
        conf['layer_sizes'] = [FIELD_SIZES, 10, 1]
        return MultiTask_FwFM(**conf)
    elif model_name == 'DINN':
        conf['layer_sizes'] = [FIELD_SIZES, 10, 1]
        return DINN(**conf)
def mapConf2Model(name):
    conf = d_name_conf[name]
    model_name = name.split('_')[0]
    if model_name == 'ffm':
        return FFM(**conf)
    elif model_name == 'fwfm':
        return FwFM(**conf)
    elif model_name == 'fm':
        return FM(**conf)
    elif model_name == 'lr':
        return LR(**conf)
Пример #4
0
def solve(train_x, train_y, test_x, test_y , K=5, method='PLA', iter_method='whole', 
          optimizer='BGD', dynamic_lr=False, batch_size=9999):

    train_x, train_y = split_data(train_x, train_y, K)
    assert(method=='PLA' or method=='LR')

    #mean_test_accuracy = 0
    #mean_train_accuracy = 0
    for i in range(K):
        #filter_list = [j for j in range(K) if i != j]
        #trains_x = trainsform(train_x, filter_list)

        #trains_y = trainsform(train_y, filter_list)
        #ests_x = train_x[i]
        #tests_y = train_y[i]
            
        trains_x = train_x[0]
        trains_y = train_y[0]

        if method=='PLA': 
            model = PLA(num_class=40, lr=1, iter_method=iter_method)
            model.fit(trains_x, trains_y, iter_size=100)
        elif method=='LR': 
            model = LR(num_class=40, lr=1, optimizer=optimizer)
            model.fit(trains_x, trains_y, iter_size=100, batch_size=batch_size, dynamic_lr=dynamic_lr)
            
        #pred = model.predict(tests_x)
        #test_accuracy = model.score(tests_y,pred)
        #mean_test_accuracy += test_accuracy

        pred = model.predict(trains_x)
        train_accuracy = model.score(trains_y, pred)
        #mean_train_accuracy += train_accuracy

        #print("fold %d, test accuracy %f"%(i, test_accuracy))
        print("fold %d, train accuracy %f"%(i, train_accuracy))

        pred = model.predict(test_x)
        
        
        test_accuracy = model.score(test_y,pred)
        print(pred)
        print(test_y)
        print("fold %d, test accuracy %f"%(i, test_accuracy))
Пример #5
0
    def __init__(self,
                 num_nodes,
                 n,
                 p,
                 max_rounds=5,
                 dane_rounds=50,
                 tau=0.1,
                 gamma=0.8,
                 mu=100,
                 init_params=None,
                 noisy=False):

        self.num_nodes = num_nodes
        self.n = n
        self.p = p + 1  # Add 1 for bias term
        self.max_rounds = max_rounds
        self.dane_rounds = dane_rounds
        self.tau = tau
        self.gamma = gamma
        self.mu = mu
        self.noisy = noisy

        if init_params is None:
            init_params = np.random.randn(self.p, 1)

        self.init_params = init_params
        self.w = np.random.randn(self.p, 1)

        ps = [p] * self.num_nodes
        ws = [np.copy(self.w) for i in xrange(self.num_nodes)]
        loaders = get_LRGL(self.n,
                           ps,
                           ws=ws,
                           noisys=[self.noisy] * self.num_nodes,
                           bias=True)

        self.servers = [BS(l) for l in loaders]
        self.model = LR(self.p)
        self.w_hat = None
Пример #6
0
                      (np.argmax(history_score), np.max(history_score)))
                break


algo = 'pnn2'

if algo == 'lr':
    lr_params = {
        'input_dim': input_dim,
        'opt_algo': 'gd',
        'learning_rate': 0.01,
        'l2_weight': 0,
        'random_seed': 0
    }

    model = LR(**lr_params)
elif algo == 'fm':
    fm_params = {
        'input_dim': input_dim,
        'factor_order': 10,
        'opt_algo': 'gd',
        'learning_rate': 0.1,
        'l2_w': 0,
        'l2_v': 0,
    }

    model = FM(**fm_params)
elif algo == 'fnn':
    fnn_params = {
        'layer_sizes': [field_sizes, 10, 1],
        'layer_acts': ['tanh', 'none'],
Пример #7
0
import numpy as np
import pylab as pl
from models import dendrite, soma, nmda, HH, LR


def nmda_impulse(y, t, T=10, t_in=20, t_end=23):
    rb = 0.005
    dy = nmda(y)
    if t_in <= t < t_end:
        dy[0] += -rb * T * y[0]
        dy[1] += rb * T * y[0] - rb * y[1] * T
        dy[2] += rb * y[1]
    return dy


LRt = lambda t, y: LR(y)
y0 = np.array([-50.0, 0.7, 0.0530])
#time = np.linspace(0, 500, 1000000)
solver = 'dop853'  #'vode'#'dop853'
r = ode(LRt).set_integrator(solver)
r.set_initial_value(y0, 0)
t1 = 20
dt = 0.01
v = []
time = []
while r.t < t1:  #r.successful() and
    r.integrate(r.t + dt)
    v.append(r.y[0])
    time.append(r.t)
#out = odeint(HH_input, y0, time)
pl.figure(1)
Пример #8
0
field_size = Xi_train.shape[1]

algo = sys.argv[2]

if algo == 'lr':
    lr_params = {
        "feature_size": feature_size,
        "field_size": field_size,
        "epoch": 10,
        "batch_size": 1024,
        "learning_rate": 0.001,
        "optimizer_type": "adam",
        "l2_reg": 0.01,
        "verbose": True
    }
    lr = LR(**lr_params)
    lr.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid)
elif algo == 'fm':
    fm_params = {
        "feature_size": feature_size,
        "field_size": field_size,
        "embedding_size": 15,
        "epoch": 20,
        "batch_size": 1024,
        "learning_rate": 0.001,
        "optimizer_type": "adam",
        "l2_w_reg": 0.01,
        "l2_v_reg": 0.01,
        "verbose": True
    }
    fm = FM(**fm_params)
Пример #9
0
    device=device)

train_iter = BatchWrapper(train_iter, x_fields, y_fields)
dev_iter = BatchWrapper(dev_iter, x_fields, y_fields)
test_iter = BatchWrapper(test_iter, x_fields, y_fields)

if __name__ == '__main__':
    regression = True if tasks[task][1] == 1 else False  #就1个回归问题,其实单独处理更好

    #hyper parameters
    learning_rate = 0.0001
    epochs = 50
    #fix_length = 50  依赖于任务
    #static = False  #update word emmbeddings or not,在模型和training中默认设置为static = False
    dropout = 0.5
    l2 = 0
    mean = True  #mean/sum

    t = 5
    test_accs = []
    for i in range(t):
        model = LR(task, vocab, mean, dropout=dropout)
        model = training(regression, train_iter, dev_iter, model, device,
                         learning_rate, l2, epochs)
        test_acc = evaluating(test_iter, model, device)[0]
        print('test_acc: %.3f' % test_acc)  #acc
        test_accs.append(test_acc)

    print('%d times: %s' % (t, test_accs))
    print('%d times average: %.3f' % (t, sum(test_accs) / t))
Пример #10
0
test = pd.read_csv(data_dir + 'ml-tag-test.csv')
valid = pd.read_csv(data_dir + 'ml-tag-valid.csv')
data = pd.concat([train, test, valid])
features_sizes = [data[c].nunique() for c in features]

y_train = train['y'].values.reshape((-1, 1))
y_test = test['y'].values.reshape((-1, 1))
y_valid = valid['y'].values.reshape((-1, 1))

#lambdas=[0.01,0.1,0.5,1.0,2.0]
#for l in lambdas:
ls = []
Rounds = 1
for _ in range(Rounds):
    model = LR(
        features_sizes, hash_size=int(1e6)
    )  #valid score 1e5:0.82 3e5:0.79  6e5:0.773 1e6:0.766  | proto test score:0.852
    #model=FM(features_sizes,k=256)#0.474 hash->
    #model = FM(features_sizes, k=24,hash_size=int(1e6)) #protoscore:k=24+h1e6=0.692 0.693 (valid比test好 0.631)
    #model=MLP(features_sizes,deep_layers=(256,256),k=256) #小batch=1024 LR不用小.同1e-3 valid_score=model.fit(train[features],valid[features],y_train,y_valid,lr=0.001,N_EPOCH=100,batch_size=1024,early_stopping_rounds=15)
    #model = DeepFM(features_sizes, deep_layers=(256, 256), k=256)
    #model = NFM(features_sizes, k=256)
    #model = AFM(features_sizes,k=256,attention_FM=256)
    #model = AFM(features_sizes, k=256, attention_FM=8,dropout_keeprate=0.9,lambda_l2=0.001)
    #model = MLP(features_sizes, deep_layers=(1,), k=256)
    #model=AutoInt(features_sizes,k=8)
    valid_score = model.fit(train[features],
                            valid[features],
                            y_train,
                            y_valid,
                            lr=0.001,
    enc = ColdStartEncoder()
    train.loc[:, c] = enc.fit_transform(train[c])
    test.loc[:, c] = enc.transform(test[c])
    encs.append(enc)

train_transaction_id, test_transaction_id = train.index, test.index
train = train.reset_index(drop=True)
test = test.reset_index(drop=True)
train_y = train['isFraud'].reset_index(drop=True)

X_train,X_valid=train.iloc[:472432,:].sample(frac=1.0,random_state=42),\
                train.iloc[472432:,:].sample(frac=1.0,random_state=42)
y_train = X_train['isFraud'].values.reshape((-1, 1))
y_valid = X_valid['isFraud'].values.reshape((-1, 1))

model = LR(features_sizes, loss_type='binary', metric_type='auc')
#model=FM(features_sizes,k=8,loss_type='binary',metric_type='auc')
#model=MLP(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(32,32))
#model=BiFM(features_sizes,k=8,loss_type='binary',metric_type='auc')
#model=DeepFM(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(32,32))
#model=AFM(features_sizes,loss_type='binary',metric_type='auc',attention_FM=8)
#model=CFM(features_sizes,loss_type='binary',metric_type='auc')
#model=MLR(features_sizes,loss_type='binary',metric_type='auc',MLR_m=16)
#model=MFM(features_sizes,k=8,loss_type='binary',metric_type='auc',MFM_m=2)

best_score = model.fit(X_train[cate_features],
                       X_valid[cate_features],
                       y_train,
                       y_valid,
                       lr=0.0005,
                       N_EPOCH=50,
Пример #12
0
data.to_hdf(data_dir+'train.hdf', 'w',complib='blosc', complevel=5)
'''
data = pd.read_hdf(data_dir + 'train.hdf').sample(frac=1.0, random_state=42)

features_sizes = [data[c].nunique() for c in features]
#data=data.sample(frac=0.1,random_state=42)
print("Data Prepared.")

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data[features],
                                                    data['click'],
                                                    test_size=0.2,
                                                    random_state=42)
y_train = y_train.values.reshape((-1, 1))
y_test = y_test.values.reshape((-1, 1))

model = LR(features_sizes, loss_type='binary')  #bs=1000
#model=FM(features_sizes,k=8)#bs=500
# model=MLP(features_sizes,deep_layers=(16,16),k=16)
print(model)
best_score = model.fit(X_train,
                       X_test,
                       y_train,
                       y_test,
                       lr=0.001,
                       N_EPOCH=50,
                       batch_size=500,
                       early_stopping_rounds=1)  #0.0005->0.001(1e-3 bs=1000)

#best_score = model.fit(X_train, X_test, y_train, y_test, lr=0.0002, N_EPOCH=50, batch_size=500,early_stopping_rounds=3)#0.0005->0.001(1e-3 bs=1000)
Пример #13
0
                    np.argmax(history_score), np.max(history_score)))
                break


algo = 'pnn2'

if algo == 'lr':
    lr_params = {
        'input_dim': input_dim,
        'opt_algo': 'gd',
        'learning_rate': 0.01,
        'l2_weight': 0,
        'random_seed': 0
    }

    model = LR(**lr_params)
elif algo == 'fm':
    fm_params = {
        'input_dim': input_dim,
        'factor_order': 10,
        'opt_algo': 'gd',
        'learning_rate': 0.1,
        'l2_w': 0,
        'l2_v': 0,
    }

    model = FM(**fm_params)
elif algo == 'fnn':
    fnn_params = {
        'layer_sizes': [field_sizes, 10, 1],
        'layer_acts': ['tanh', 'none'],
Пример #14
0
def run_models(words,
               models,
               verbose,
               train=True,
               test=True,
               embeddings=False):
    '''
    Runs all the models that are specified with the specified word set.
    It runs all preporocessing steps necessary for the models specified
    Note: If a model is specified twice, it will be run twice, but the preprocessing
    on the input data will not(useful to test for model parameter initialization)
    
    Returns a list containing the the objects of the models used, 
        the outputs they predicted and 
        the sklearn classification reports (dictionary format), 
        in the order where they were provided
        
    Keyword arguments:
        words: list of list of words and features. 
            Format: n*m. n=nr of words, m=nr features + expected output (single)
        models: a string containing the model names. Order is not important.
            Possible models are: NB, LR, SVM, HMM, CRF. Coming soon: CNN
            If a model is specified twice, it will be run twice. The input is
            randomized only once, where applicable
        veboose: 0: print nothing
                1: print results
                2: print status messages:
                3: print both
    '''
    # Preparing data for one-hot encodign -- converts strings into integers
    if any(i in models for i in ['NB', 'LR', 'SVM']):
        verbose | 2 and print('Initial pre-processing...')
        if embeddings:
            stems = [word[0] for word in words]
            words = [word[1:] for word in words]
        X, Y, transl, labels_num, labels_name = create_dataset(words)

    #Algorithm uses sentences (list of list of tuples): HMM
    if 'HMM' in models:
        verbose | 2 and print('Preprocessing data for HMM...')
        sentences_hmm, symbols, tag_set = words2tuples(words)
        _, y_train, _, y_test = split_tr([], sentences_hmm, 0.8)
        x_test = [[tup[0] for tup in sentence] for sentence in y_test]
        y_test = [[tup[1] for tup in sentence] for sentence in y_test]
        #shuffle_parallel(x_test,y_test)
        data_hmm = data_wrap(None, y_train, x_test, y_test)

    # Algorithms using shuffled, one-hot data:NB,LR,SVM
    if any(i in models for i in ['NB', 'LR', 'SVM']):
        verbose | 2 and print('Preprocessing data for NB, LR and/or SVM...')
        indexes = shuffle_parallel(X, Y)
        X_onehot_sh = one_hot(X, transl)
        if embeddings:
            verbose | 2 and print('Loading and generating embeddings...')
            X_onehot_sh = embeddings.insert_embeddings(X_onehot_sh, stems,
                                                       indexes)
        x_train_oh_sh, y_train_oh_sh, x_test_oh_sh, y_test_oh_sh = split_tr(
            X_onehot_sh, Y, 0.8)
        data_shuffled = data_wrap(x_train_oh_sh, y_train_oh_sh, x_test_oh_sh,
                                  y_test_oh_sh, transl, labels_num,
                                  labels_name)

    #Ordered, using sentences (list of list of dict): CRF
    if 'CRF' in models:
        verbose | 2 and print('Preprocessing data for CRF...')
        tokens_dict, labels_dict = words2dictionary(words)
        shuffle_parallel(tokens_dict, labels_dict)
        tokens_train, labels_train, tokens_test, labels_test = split_tr(
            tokens_dict, labels_dict, 0.8)
        data_dictionary = data_wrap(tokens_train, labels_train, tokens_test,
                                    labels_test)

    model_objects = []
    model_results = []
    model_predictions = []

    #removes clutter when calling the functions separately
    #Using a list of function handlers could also be used, but I find that to be
    #less intuitive
    def _add_to_output(model_y_pred):
        model_objects.append(model_y_pred[0])
        model_results.append(model_y_pred[1])
        if (len(model_y_pred) > 2):
            model_predictions.append(model_y_pred[2])

    #Run each of the models from the paramters, while KEEPING THE ORDER they were called in
    #and append it to the return lists
    for model in models:
        if 'HMM' in model:
            verbose | 2 and print('Running HMM from nltk...')
            _add_to_output(HMM(data_hmm, symbols, tag_set, verbose | 1))

        if 'NB' in model:
            verbose | 2 and print('Running NB ' +
                                  ('with ' if embeddings else 'without ') +
                                  'embeddings...')
            if embeddings:
                _add_to_output(NB_cont(data_shuffled, verbose | 1))
            else:
                _add_to_output(NB_disc(data_shuffled, verbose | 1))

        if 'LR' in model:
            verbose | 2 and print('Running LR ' +
                                  ('with ' if embeddings else 'without ') +
                                  'embeddings...')
            _add_to_output(
                LR(data_shuffled, verbose | 1, C=(0.1 if embeddings else 5)))

        if 'SVM' in model:
            verbose | 2 and print('Running SVM ' +
                                  ('with ' if embeddings else 'without ') +
                                  'embeddings...')
            _add_to_output(SVM(data_shuffled, verbose | 1))

        if 'CRF' in model:
            verbose | 2 and print('Running CRF...')
            _add_to_output(CRF(data_dictionary, verbose | 1))

    return model_objects, model_results, model_predictions