Пример #1
0
 def test_gauge_no_fermions(self):
     tf.reset_default_graph()
     # spec
     batch_size = 1000
     N = 2
     m = 10
     num_fermions = 0
     rank = 1
     name = "test_gauge_no_fermions"
     algebra = SU(N)
     # build the model
     with tf.variable_scope(name):
         dim = 2 * algebra.dim
         bosonic_wavefunc = Autoregressive(
             [Mixture([Affine(Normal())] * 2)] * dim, dim, 2)
         fermionic_wavefunc = FermionicWavefunction(algebra, dim, 2,
                                                    num_fermions, rank, dim,
                                                    1)
         vectorizer = Vectorizer(algebra, tfp.bijectors.Exp())
         wavefunc = Wavefunction(algebra, vectorizer, bosonic_wavefunc,
                                 fermionic_wavefunc)
         bosonic = wavefunc.sample(batch_size)
         log_norm, _ = wavefunc(bosonic)
     # observables
     radius = tf.sqrt(matrix_quadratic_potential(bosonic) / N)
     gauge = matrix_SUN_adjoint_casimir(wavefunc, bosonic)
     rotation = miniBMN_SO3_casimir(wavefunc, bosonic)
     energy = fuzzy_sphere_energy(m, wavefunc, bosonic)
     # training
     print("Training ...")
     output_path = "results/" + name + "/"
     obs = minimize(name,
                    log_norm,
                    energy,
                    10000, {
                        "r": radius,
                        "gauge": gauge,
                        "rotation": rotation
                    },
                    100,
                    5000,
                    lr=1e-3,
                    output_path=output_path)
     obs = minimize(name,
                    log_norm,
                    energy,
                    5000, {
                        "r": radius,
                        "gauge": gauge,
                        "rotation": rotation
                    },
                    100,
                    5000,
                    lr=1e-4,
                    restore_path=output_path)
     self.assertTrue(np.abs(obs["energy"] + 12593) < 5)
     self.assertTrue(np.abs(obs["r"] - 12.99) < 1e-2)
     self.assertTrue(0 < obs["gauge"] < 1e-8)
     self.assertTrue(0 < obs["rotation"] < 1e-2)
Пример #2
0
 def test_gauge_susy(self):
     tf.reset_default_graph()
     # spec
     batch_size = 1000
     N = 2
     m = 10
     num_fermions = 2
     rank = 4
     name = "test_gauge_susy"
     algebra = SU(N)
     # build the model
     with tf.variable_scope(name):
         dim = 2 * algebra.dim
         with open("data/SpinMatrices" + str(N) + ".bin", "rb") as f:
             mats = tf.constant(
                 np.reshape(
                     np.fromfile(f,
                                 dtype=np.dtype("complex64"),
                                 count=3 * N * N), [3, N, N]))
             Sx, Sy, Sz = mats[0], mats[1], mats[2]
             offset = m * tf.stack([-Sz, -Sy, -Sx])
         vectorizer = Vectorizer(algebra, tfp.bijectors.Exp())
         offset = vectorizer.encode(tf.expand_dims(offset, 0))[0]
         bosonic_wavefunc = NormalizingFlow([Normal()] * dim, 0,
                                            tfp.bijectors.Sigmoid(), offset)
         fermionic_wavefunc = FermionicWavefunction(algebra, dim, 2,
                                                    num_fermions, rank, dim,
                                                    1)
         wavefunc = Wavefunction(algebra, vectorizer, bosonic_wavefunc,
                                 fermionic_wavefunc)
         bosonic = wavefunc.sample(batch_size)
         log_norm, fermionic = wavefunc(bosonic)
     # observables
     radius = tf.sqrt(matrix_quadratic_potential(bosonic) / N)
     kinetic = matrix_kinetic_energy(wavefunc, bosonic)
     bilinear = miniBMN_yukawa_potential(bosonic, fermionic)
     gauge = matrix_SUN_adjoint_casimir(wavefunc, bosonic)
     rotation = miniBMN_SO3_casimir(wavefunc, bosonic)
     energy = miniBMN_energy(m, wavefunc, bosonic)
     # training
     print("Training ...")
     output_path = "results/" + name + "/"
     obs = minimize(name,
                    log_norm,
                    energy,
                    5000, {
                        "r": radius,
                        "kinetic": kinetic,
                        "bilinear": bilinear,
                        "gauge": gauge,
                        "rotation": rotation
                    },
                    100,
                    5000,
                    lr=1e-3,
                    output_path=output_path)
     obs = minimize(name,
                    log_norm,
                    energy,
                    5000, {
                        "r": radius,
                        "kinetic": kinetic,
                        "bilinear": bilinear,
                        "gauge": gauge,
                        "rotation": rotation
                    },
                    100,
                    5000,
                    lr=1e-4,
                    restore_path=output_path)
     self.assertTrue(np.abs(obs["energy"] - 0) < 1)
     self.assertTrue(np.abs(obs["kinetic"] - 55) < 5)
     self.assertTrue(np.abs(obs["r"] - 8.66) < 1e-2)
     self.assertTrue(np.abs(obs["bilinear"] + 40) < 0.4)
     self.assertTrue(0 < obs["gauge"] < 1e-8)
     self.assertTrue(np.abs(obs["rotation"] - 0) < 4e-2)
Пример #3
0
 def test_no_gauge_single_fermion(self):
     tf.reset_default_graph()
     # spec
     batch_size = 1000
     N = 2
     m = 10
     num_fermions = 1
     rank = 1
     name = "test_no_gauge_single_fermion"
     algebra = SU(N)
     # build the model
     with tf.variable_scope(name):
         bosonic_dim = 3 * algebra.dim
         fermionic_dim = 2 * algebra.dim
         bosonic_wavefunc = Autoregressive(
             [Mixture([Affine(Normal())] * 2)] * bosonic_dim, bosonic_dim,
             2)
         fermionic_wavefunc = FermionicWavefunction(algebra, bosonic_dim, 2,
                                                    num_fermions, rank,
                                                    fermionic_dim, 2)
         vectorizer = Vectorizer(algebra)
         wavefunc = Wavefunction(Trivial(), vectorizer, bosonic_wavefunc,
                                 fermionic_wavefunc)
         bosonic = wavefunc.sample(batch_size)
         log_norm, fermionic = wavefunc(bosonic)
     # observables
     radius = tf.sqrt(matrix_quadratic_potential(bosonic) / N)
     kinetic = matrix_kinetic_energy(wavefunc, bosonic)
     bilinear = miniBMN_yukawa_potential(bosonic, fermionic)
     gauge = matrix_SUN_adjoint_casimir(wavefunc, bosonic)
     rotation = miniBMN_SO3_casimir(wavefunc, bosonic)
     pre_energy = fuzzy_sphere_energy(m, wavefunc, bosonic)
     energy = miniBMN_energy(m, wavefunc, bosonic)
     # pretraining
     print("Pretraining ...")
     output_path = "results/" + name + "/"
     obs = minimize(name,
                    log_norm,
                    pre_energy,
                    10000, {"r": radius},
                    1000,
                    5000,
                    lr=1e-3,
                    output_path=output_path)
     # training
     print("Training ...")
     obs = minimize(name,
                    log_norm,
                    energy,
                    10000, {
                        "r": radius,
                        "kinetic": kinetic,
                        "bilinear": bilinear,
                        "gauge": gauge,
                        "rotation": rotation
                    },
                    100,
                    5000,
                    lr=1e-3,
                    restore_path=output_path)
     obs = minimize(name,
                    log_norm,
                    energy,
                    5000, {
                        "r": radius,
                        "kinetic": kinetic,
                        "bilinear": bilinear,
                        "gauge": gauge,
                        "rotation": rotation
                    },
                    100,
                    5000,
                    lr=1e-4,
                    restore_path=output_path)
     self.assertTrue(obs["energy"] < 15)
     self.assertTrue(obs["kinetic"] < 65)
     self.assertTrue(np.abs(obs["r"] - 8.66) < 2e-2)
     self.assertTrue(np.abs(obs["bilinear"] + 20) < 5e-2)
Пример #4
0
def fit(request, hyper_params=default_hyper_params, nperiod=288):
    passed_hyper_params = hyper_params
    hyper_params = {}
    hyper_params.update(passed_hyper_params)
    hyper_params.update(request.hyper_params)

    logging.info(f"fitting model with hyper parameters {hyper_params}")

    frame = make_frame(request, hyper_params=hyper_params)

    basal_insulin_curve = expia1(
        np.arange(nperiod),
        request.basal_insulin_parameters.get("delay", 5.0) / 5.0,
        request.basal_insulin_parameters["peak"] / 5.0,
        request.basal_insulin_parameters["duration"] / 5.0,
    )
    # TODO: make this the average carb curve
    default_carb_curve = carb_curve(np.arange(nperiod), 3, 36)

    # Set up parameter schedules.
    #
    # We arrange for each of basal, insulin sensitivity, and carb ratios
    # to have 24 windows in each day.
    #
    # TODO: assign windows for carb ratios based on data density
    #
    # TODO: find a better initialization strategy when no schedules are provided
    #
    # Order is: basals, insulin sensitivities, carb ratios
    if request.insulin_sensitivity_schedule is not None:
        init_insulin_sensitivity_params = attribute_parameters(
            basal_insulin_curve, request.insulin_sensitivity_schedule.index,
            request.insulin_sensitivity_schedule.values)
    else:
        init_insulin_sensitivity_params = 140 * np.ones(24)

    if request.carb_ratio_schedule is not None:
        init_carb_ratio_params = attribute_parameters(
            default_carb_curve, request.carb_ratio_schedule.index,
            request.carb_ratio_schedule.values)
    else:
        init_carb_ratio_params = 15. * np.ones(24)

    if request.basal_rate_schedule is not None:
        init_basal_rate_params = attribute_parameters(
            basal_insulin_curve, request.basal_rate_schedule.index,
            request.basal_rate_schedule.values)
    else:
        init_basal_rate_params = np.zeros(24)

    init_params = np.concatenate([
        init_basal_rate_params, init_insulin_sensitivity_params,
        init_carb_ratio_params
    ])

    def unpack_params(params):
        return params[:24], params[24:48], params[48:72]

    insulin = frame["insulin"].values
    carbs = frame["carb"].values
    deltas = frame["delta"].values

    hour = frame.index.hour
    quantile = hyper_params["quantile_loss_quantile"]

    # Construct bounds based on the allowable tuning limit.
    if request.tuning_limit is not None and request.tuning_limit > 0:
        bounds = list(
            zip(init_params * (1 - request.tuning_limit),
                init_params * 1 + request.tuning_limit))
    else:
        bounds = None

    # Re-weight entries that have carbohydrate activity so that
    # the model prefers having (much) better carb parameters
    # over slightly worse-fitting sensitivity and basal parameters.
    weights = np.ones_like(deltas)
    weights[frame["carb"] > 0] = (np.sum(frame["carb"] == 0) /
                                  np.sum(frame["carb"] > 0))

    def model(params):
        basals, insulin_sensitivities, carb_ratios = unpack_params(params)
        basal = basals[hour]
        insulin_sensitivity = insulin_sensitivities[hour]
        carb_ratio = carb_ratios[hour]
        return insulin_sensitivity * (carbs / carb_ratio - insulin + basal)

    if bounds is not None:
        lower, upper = zip(*bounds)
        lower, upper = np.array(lower), np.array(upper)
        # This is a hack to get around the fact that basals are summed
        # over multiple hours. Thus this is only an approximate bounds,
        # but it's much simpler than the alternative.
        insulin_duration_hours = request.basal_insulin_parameters[
            "duration"] / 60.
        lower[:24] = lower[:24] / insulin_duration_hours
        upper[:24] = upper[:24] / insulin_duration_hours

    def loss(params, iter):
        preds = model(params)
        penalty = -10.0 * np.sum(np.minimum(params, 0.0))

        # Use a barrier function if bounds are provided.
        if bounds is not None:
            # HACK: simulate a "rectified" barrier function here.
            # Note also that this doesn't work for basals since they
            # are summed up.
            epsilon = 0.00001
            penalty_params = params.copy()
            penalty_params[penalty_params >=
                           upper] = upper[penalty_params > upper] - epsilon
            penalty_params[penalty_params <=
                           lower] = lower[penalty_params <= lower] + epsilon
            penalty += np.sum(
                np.maximum(0., -0.01 * np.log(upper - penalty_params)))
            penalty += np.sum(
                np.maximum(0., -0.01 * np.log(penalty_params - lower)))

        # Quantile regression: 50 pctile
        error = weights * (deltas - preds)
        return np.mean(np.maximum(quantile * error,
                                  (quantile - 1.0) * error)) + penalty

    if hyper_params["optimizer"] == "adam":
        params, training_loss = train.minimize(loss, init_params)
    elif hyper_params["optimizer"] == "scipy.minimize":
        opt = optimize.minimize(loss, init_params, args=(0, ))
        params = opt.x
        training_loss = opt.fun

    # Clip the parameters here in case the loss penalties
    # above were insufficient.
    params = np.maximum(params, 0.0)
    basals, insulin_sensitivities, carb_ratios = unpack_params(params)

    # Now, infer parameter schedules based on the optimized
    # instantaneous parameters. For carbs, we use the average
    # carb curve based on data. We also use the basal insulin
    # parameters for ISF schedules.

    if request.basal_rate_schedule is None:
        # Default: hourly
        basal_rate_index = np.arange(0, 288, 12)
    else:
        basal_rate_index = request.basal_rate_schedule.reindexed(5)
    basal_rate_schedule = (identify_curve(
        basal_insulin_curve, basal_rate_index, np.repeat(basals, 12)) * 12)

    if request.insulin_sensitivity_schedule is None:
        insulin_sensitivity_index = np.arange(0, 288, 12 * 4)
    else:
        insulin_sensitivity_index = request.insulin_sensitivity_schedule.reindexed(
            5)
    insulin_sensitivity_schedule = identify_curve(
        basal_insulin_curve, insulin_sensitivity_index,
        np.repeat(insulin_sensitivities, 12))

    if request.carb_ratio_schedule is None:
        carb_ratio_index = 12 * 6 + np.arange(0, 12 * 12, 4 * 12)
    else:
        carb_ratio_index = request.carb_ratio_schedule.reindexed(5)
    carb_ratio_schedule = identify_curve(default_carb_curve, carb_ratio_index,
                                         np.repeat(carb_ratios, 12))

    # Finally, "quantize" the basal schedule if needed.
    #
    # TODO: Currently this simply tries to match the closest
    # allowable basal rate. We should try to push this up to the
    # model (e.g., the cost function could encourage values close to
    # allowable values), or split the schedule so so that the total
    # amount delivered over the scheduled intervals is equal to the
    # modeled amount, but the rate varies within the intervals.
    #
    # TODO: Another possibility is to perform one model run
    # to fit the basals, then another with the basals "fixed" to the
    # snapped values, allowing the model to adjust the other
    # parameters accordingly.
    #
    # TODO: collapse adjacent entries with the same value.
    if request.allowed_basal_rates is not None:
        allowed = sorted(request.allowed_basal_rates)
        for (i, rate) in enumerate(basal_rate_schedule):
            j = bisect.bisect(allowed, rate)
            # TODO: perhaps be a little more generous here,
            # snapping up when values are (much) closer.
            if j == 0 and rate != allowed[0]:
                basal_rate_schedule[i] = 0.0
            elif j >= len(basal_rate_schedule) or rate != allowed[j]:
                basal_rate_schedule[i] = allowed[j - 1]

    def make_schedule(index, schedule):
        assert len(index) == len(schedule)
        return ((5 * index).tolist(), schedule.tolist())

    return Model(
        params={
            "insulin_sensitivity_schedule":
            make_schedule(insulin_sensitivity_index,
                          insulin_sensitivity_schedule),
            "carb_ratio_schedule":
            make_schedule(carb_ratio_index, carb_ratio_schedule),
            "basal_rate_schedule":
            make_schedule(basal_rate_index, basal_rate_schedule),
        },
        raw_insulin_sensitivities=insulin_sensitivities,
        raw_carb_ratios=carb_ratios,
        raw_basals=basals,
        training_loss=training_loss,
    )
def train(experiment):
    print('experiment: ', experiment)
    config = initialize_env(experiment)
    X_train, Y_train, X_valid, Y_valid, _, _ = create_Uttdata(config)

    vocab = utt_Vocab(config, X_train + X_valid, Y_train + Y_valid)

    X_train, Y_train = minimize(X_train), minimize(Y_train)
    X_valid, Y_valid = minimize(X_valid), minimize(Y_valid)

    with open('./data/minidata.pkl', 'wb') as f:
        a, b = parallelize(X_train, Y_train)
        pickle.dump([(c, d) for c, d in zip(a, b)], f)

    X_train, Y_train = vocab.tokenize(X_train, Y_train)
    X_valid, Y_valid = vocab.tokenize(X_valid, Y_valid)

    X_train, Y_train = parallelize(X_train, Y_train)
    X_valid, Y_valid = parallelize(X_valid, Y_valid)
    print('Finish create dataset')

    lr = config['lr']
    batch_size = config['BATCH_SIZE']

    encoder = UtteranceEncoder(
        utt_input_size=len(vocab.word2id),
        embed_size=config['UTT_EMBED'],
        utterance_hidden=config['UTT_HIDDEN'],
        padding_idx=vocab.word2id['<UttPAD>']).to(device)
    decoder = UtteranceDecoder(
        utterance_hidden_size=config['DEC_HIDDEN'],
        utt_embed_size=config['UTT_EMBED'],
        utt_vocab_size=config['UTT_MAX_VOCAB']).to(device)
    context = UtteranceContextEncoder(
        utterance_hidden_size=config['UTT_CONTEXT']).to(device)

    encoder_opt = optim.Adam(encoder.parameters(), lr=lr)
    decoder_opt = optim.Adam(decoder.parameters(), lr=lr)
    context_opt = optim.Adam(context.parameters(), lr=lr)

    model = seq2seq(device).to(device)

    criterion = nn.CrossEntropyLoss(ignore_index=vocab.word2id['<UttPAD>'])

    start = time.time()

    print_total_loss = 0
    _valid_loss = None

    for e in range(config['EPOCH']):
        tmp_time = time.time()
        print('Epoch {} start'.format(e + 1))

        indexes = [i for i in range(len(X_train))]
        random.shuffle(indexes)
        k = 0
        while k < len(indexes):
            step_size = min(batch_size, len(indexes) - k)

            encoder_opt.zero_grad()
            decoder_opt.zero_grad()

            batch_idx = indexes[k:k + step_size]

            print('\r{}/{} pairs training ...'.format(k + step_size,
                                                      len(X_train)),
                  end='')

            X_seq = [X_train[seq_idx] for seq_idx in batch_idx]
            Y_seq = [Y_train[seq_idx] for seq_idx in batch_idx]

            max_xseq_len = max(len(x) + 1 for x in X_seq)
            max_yseq_len = max(len(y) + 1 for y in Y_seq)

            for si in range(len(X_seq)):
                X_seq[si] = X_seq[si] + [vocab.word2id['<UttPAD>']
                                         ] * (max_xseq_len - len(X_seq[si]))
                Y_seq[si] = Y_seq[si] + [vocab.word2id['<UttPAD>']
                                         ] * (max_yseq_len - len(Y_seq[si]))
            X_tensor = torch.tensor([x for x in X_seq]).to(device)
            Y_tensor = torch.tensor([y for y in Y_seq]).to(device)

            loss = model.forward(X=X_tensor,
                                 Y=Y_tensor,
                                 encoder=encoder,
                                 decoder=decoder,
                                 context=context,
                                 step_size=step_size,
                                 criterion=criterion,
                                 config=config)
            print_total_loss += loss

            encoder_opt.step()
            decoder_opt.step()
            context_opt.step()

            k += step_size

        print()

        valid_loss = validation(X=X_valid,
                                Y=Y_valid,
                                model=model,
                                encoder=encoder,
                                decoder=decoder,
                                context=context,
                                vocab=vocab,
                                config=config)

        if _valid_loss is None:
            torch.save(encoder.state_dict(),
                       os.path.join(config['log_dir'], 'enc_beststate.model'))
            torch.save(decoder.state_dict(),
                       os.path.join(config['log_dir'], 'dec_beststate.model'))
        else:
            if _valid_loss > valid_loss:
                torch.save(encoder.state_dict(),
                           os.path.join(config['log_dir'], 'enc_beststate'))
                torch.save(
                    decoder.state_dict(),
                    os.path.join(config['log_dir'], 'dec_beststate.model'))

        if (e + 1) % config['LOGGING_FREQ'] == 0:
            print_loss_avg = print_total_loss / config['LOGGING_FREQ']
            print_total_loss = 0
            print('steps %d\tloss %.4f\tvalid loss %.4f | exec time %.4f' %
                  (e + 1, print_loss_avg, valid_loss, time.time() - tmp_time))

        if (e + 1) % config['SAVE_MODEL'] == 0:
            print('saving model')
            torch.save(
                encoder.state_dict(),
                os.path.join(config['log_dir'],
                             'enc_state{}.model'.format(e + 1)))
            torch.save(
                decoder.state_dict(),
                os.path.join(config['log_dir'],
                             'dec_state{}.model'.format(e + 1)))

    print()
    print('Finish training | exec time: %.4f [sec]' % (time.time() - start))