Пример #1
0
def load_snorkel():
    filename = 'snorkel_model'
    gms = []
    for i in range(6):
        gm = GenerativeModel()
        gm.load(filename + str(i))
        gms.append(gm)
    return gms
Пример #2
0
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeTeamCompetition-v0', agent_list)
    d = []

    # Run the episodes just like OpenAI Gym
    for i_episode in range(300):
        state = env.reset()
        done = False
        while not done:
            #            env.render()
            cur_obs = env.get_observations()
            actions = env.act(state)
            for ob, act in zip(cur_obs, actions):
                val = np.zeros(6)
                val[act] = 1
                d.append([ob, val])

            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()

    lf = get_lf()

    rows = len(d)

    L = np.zeros([6, rows, len(lf)])
    for r in range(rows):
        for i, f in enumerate(lf):
            L[:, r, i] = f(d[r][0])

    gms = []
    for i in range(6):
        gms.append(GenerativeModel())

    # TODO: add ground labels to training
    filename = 'snorkel_model'
    for i, gm in enumerate(gms):
        temp_l = np.squeeze(L[i, :, :]).astype(int)
        gm.train(temp_l)
        gm.save(filename + str(i))
Пример #3
0
 def __init__(self, *args, **kwargs):
     super(SnorkelAgent, self).__init__(*args, **kwargs)
     #TODO: load model
     #        self.models = np.load(filename)['m'].item()
     gms = []
     for i in range(6):
         gm = GenerativeModel()
         gm.load(filename + str(i))
         gms.append(gm)
     self.models = gms
Пример #4
0
def apply_GenMod(L_train):
    """
    Applies generative model on label matrix
    :param L_train: Label matrix
    :return: None
    """
    gen_model = GenerativeModel()
    # gen_model.train(L_train, epochs=100, decay=0.95, step_size=0.1 / L_train.shape[0], reg_param=1e-6)
    gen_model.train(L_train, cardinality=3)
    # print(gen_model.weights.lf_accuracy)
    train_marginals = gen_model.marginals(L_train)
    report.append('\n#Gen Model Stats\n')
    report.append(gen_model.learned_lf_stats().to_csv(sep=' ', index=False, header=True))
    save_marginals(session, L_train, train_marginals)
Пример #5
0
def Fitting_Gen_Model(L_train):
    gen_model = GenerativeModel()
    gen_model.train(L_train,
                    epochs=100,
                    decay=0.95,
                    step_size=0.1 / L_train.shape[0],
                    reg_param=1e-6)

    #-------------------------
    print(gen_model.weights.lf_accuracy)
    print(gen_model.weights.class_prior)
    #-------------------------
    #We now apply the generative model to the training candidates to get the noise-aware training label set. We'll refer to these as the training marginals:
    train_marginals = gen_model.marginals(L_train)
    return gen_model, train_marginals
Пример #6
0
 def train_gen_model(self,deps=False,grid_search=False):
     """ 
     Calls appropriate generative model
     """
     if self.has_snorkel:
         #TODO: GridSearch
         from snorkel.learning import GenerativeModel
         from snorkel.learning import RandomSearch
         from snorkel.learning.structure import DependencySelector
         gen_model = GenerativeModel()
         gen_model.train(self.L_train, epochs=100, decay=0.001 ** (1.0 / 100), step_size=0.005, reg_param=1.0)
     else:
         gen_model = LabelAggregator()
         gen_model.train(self.L_train, rate=1e-3, mu=1e-6, verbose=False)
     self.gen_model = gen_model
Пример #7
0
def train_snorkel_gen_model(L, gte=True):
    L_train = sparse.csr_matrix(L)

    gen_model = GenerativeModel()
    gen_model.train(L_train,
                    epochs=100,
                    decay=0.95,
                    step_size=0.01 / L_train.shape[0],
                    reg_param=1e-6)

    train_marginals = gen_model.marginals(L_train)
    marginals_threshold = (max(train_marginals) - min(train_marginals)) / 2
    train_labels = (2 * (train_marginals >= marginals_threshold) -
                    1 if gte else 2 * (train_marginals < marginals_threshold) -
                    1)

    return gen_model, train_labels, train_marginals
Пример #8
0
 def __init__(
     self,
     positive_label: str,
     class_cardinality: int = 2,
     num_epochs: int = 500,
     log_train_every: int = 50,
     seed: int = 123,
     threshold: float = 0.5,
 ):
     self.positive_label = positive_label
     self.class_cardinality = class_cardinality
     self.num_epochs = num_epochs
     self.log_train_every = log_train_every
     self.seed = seed
     self.ds = DependencySelector()
     self.gen_model = GenerativeModel(lf_propensity=True)
     self.threshold = threshold
Пример #9
0
def train_generative_model(data_matrix, burn_in=10, epochs=100, reg_param=1e-6, 
    step_size=0.001, deps=[], lf_propensity=False):
    """
    This function is desgned to train the generative model
    
    data_matrix - the label function matrix which contains the output of all label functions
    burnin - number of burn in iterations
    epochs - number of epochs to train the model
    reg_param - how much regularization is needed for the model
    step_size - how much of the gradient will be used during training
    deps - add dependencey structure if necessary
    lf_propensity - boolean variable to determine if model should model the likelihood of a label function
    
    return a fully trained model
    """
    model = GenerativeModel(lf_propensity=lf_propensity)
    model.train(
        data_matrix, epochs=epochs,
        burn_in=burn_in, reg_param=reg_param, 
        step_size=step_size, reg_type=2
    )
    return model
Пример #10
0
def train_gen_model(predicate_resume, parallelism=8):
    logging.info("Start train gen")
    session = SnorkelSession()

    labeler = _get_labeler(predicate_resume)
    logging.info("Load matrix")
    L_train = _load_matrix(predicate_resume, session, labeler)
    gen_model = GenerativeModel()
    logging.info("Train model")
    gen_model.train(L_train,
                    epochs=100,
                    decay=0.95,
                    step_size=0.1 / L_train.shape[0],
                    reg_param=1e-6,
                    threads=int(parallelism))
    logging.info("Save model")
    _save_model(predicate_resume, gen_model)
    #Save marginals
    logging.info("Get marginals")
    train_marginals = gen_model.marginals(L_train)
    logging.info("Save marginals")
    save_marginals(session, L_train, train_marginals)
Пример #11
0
def score_gen_model(predicate_resume,
                    session,
                    gen_model_name=None,
                    parallelism=16):
    if gen_model_name is None:
        model_name = "G" + predicate_resume["predicate_name"] + "Latest"
    logging.info("Stats logging")
    key_group = predicate_resume["label_group"]
    train_cids_query = get_train_cids_with_span(predicate_resume, session)
    L_train = load_ltrain(predicate_resume, session)
    gen_model = GenerativeModel()
    gen_model.load(model_name)
    gen_model.train(L_train,
                    epochs=100,
                    decay=0.95,
                    step_size=0.1 / L_train.shape[0],
                    reg_param=1e-6)
    logging.info(gen_model.weights.lf_accuracy)
    print(gen_model.weights.lf_accuracy)
    train_marginals = gen_model.marginals(L_train)
    fig = plt.figure()
    #hist=plt.hist(train_marginals, bins=20)
    #plt.savefig("plt"+strftime("%d-%m-%Y_%H_%M_%S", gmtime())+".png", dpi=fig.dpi)
    gen_model.learned_lf_stats()
Пример #12
0
def score_lfs(predicate_resume,
              L_gold_test,
              session,
              date_time,
              parallelism=8):
    dump_file_path = "./results/" + "lfs_1_" + predicate_resume[
        "predicate_name"] + date_time + ".csv"

    key_group = predicate_resume["label_group"]
    LFs = get_labelling_functions(predicate_resume)
    labeler = LabelAnnotator(lfs=LFs)
    test_cids_query = get_test_cids_with_span(predicate_resume, session)
    L_test = labeler.apply(parallelism=parallelism,
                           cids_query=test_cids_query,
                           key_group=key_group,
                           clear=True,
                           replace_key_set=False)

    data_frame = L_test.lf_stats(session)
    print(data_frame)
    logging.info(data_frame)
    data_frame.to_csv(dump_file_path)

    gen_model = GenerativeModel()
    gen_model.train(L_test,
                    epochs=100,
                    decay=0.95,
                    step_size=0.1 / L_test.shape[0],
                    reg_param=1e-6)

    p, r, f1 = gen_model.score(L_test, L_gold_test)
    print("Prec: {0:.3f}, Recall: {1:.3f}, F1 Score: {2:.3f}".format(p, r, f1))
    logging.info("Prec: {0:.3f}, Recall: {1:.3f}, F1 Score: {2:.3f}".format(
        p, r, f1))
    dump_file_path1 = "./results/" + "test_gen_1_" + predicate_resume[
        "predicate_name"] + date_time + ".csv"
    with open(dump_file_path1, 'w+b') as f:
        writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(["Precision", "Recall", "F1"])
        writer.writerow(
            ["{0:.3f}".format(p), "{0:.3f}".format(r), "{0:.3f}".format(f1)])

    test_marginals = gen_model.marginals(L_test)

    dump_file_path2 = "./results/" + "plt_1_" + predicate_resume[
        "predicate_name"] + date_time + ".csv"
    #plt.hist(test_marginals, bins=20)
    #plt.savefig(dump_file_path2)
    #plt.show()

    dump_file_path3 = "./results/" + "gen_2_" + predicate_resume[
        "predicate_name"] + date_time + ".csv"
    data_frame3 = gen_model.learned_lf_stats()
    data_frame3.to_csv(dump_file_path3)

    dump_file_path4 = "./results/" + "gen_3_" + predicate_resume[
        "predicate_name"] + date_time + ".csv"
    tp, fp, tn, fn = gen_model.error_analysis(session, L_test, L_gold_test)
    with open(dump_file_path4, 'w+b') as f:
        writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(["TP", "FP", "TN", "FN"])
        writer.writerow(
            [str(len(tp)),
             str(len(fp)),
             str(len(tn)),
             str(len(fn))])

    dump_file_path5 = "./results/" + "gen_4_" + predicate_resume[
        "predicate_name"] + date_time + ".csv"
    data_frame4 = L_test.lf_stats(session, L_gold_test,
                                  gen_model.learned_lf_stats()['Accuracy'])
    data_frame4.to_csv(dump_file_path5)
Пример #13
0
    missed = load_external_labels(session,
                                  VirusHost,
                                  annotator_name='gold',
                                  split=1)
    L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
    missed = load_external_labels(session,
                                  VirusHost,
                                  annotator_name='gold',
                                  split=2)
    L_gold_test = load_gold_labels(session, annotator_name='gold', split=2)

    # Generative model
    ds = DependencySelector()
    deps = ds.select(L_train, threshold=0.1)

    gen_model = GenerativeModel()
    gen_model.train(L_train,
                    epochs=100,
                    decay=0.95,
                    step_size=0.1 / L_train.shape[0],
                    reg_param=1.00e-03,
                    deps=deps)

    train_marginals = gen_model.marginals(L_train)

    # Discriminative model
    featurizer = FeatureAnnotator(f=hybrid_span_mention_ftrs)

    F_train = featurizer.load_matrix(session, split=0)
    F_dev = featurizer.load_matrix(session, split=1)
    F_test = featurizer.load_matrix(session, split=2)
Пример #14
0
# Labeling Function Performance - Coverage, Overlaps, Conflicts
L_train_BC.lf_stats(session)
L_train_BD.lf_stats(session)
L_train_BM.lf_stats(session)
L_train_BT.lf_stats(session)

# Analyzing Dependencies
Ldeps = []
for L in [L_train_BC, L_train_BD, L_train_BD, L_train_BD]:
    ds = DependencySelector()
    deps = ds.select(L, threshold=0.1)
    len(deps)
    Ldeps.append(deps)

gen_model = GenerativeModel(lf_propensity=True)
gen_model.train(L_train,
                deps=deps,
                decay=0.95,
                step_size=0.1 / L_train.shape[0],
                reg_param=0.0)
train_marginals = gen_model.marginals(L_train)
plt.hist(train_marginals, bins=20)
plt.show()
gen_model.learned_lf_stats()
save_marginals(session, L_train, train_marginals)
load_external_labels(session,
                     BiomarkerCondition,
                     'Biomarker',
                     'Condition',
                     'articles/disease_gold_labels.tsv',
Пример #15
0
 def __init__(self,
              query_pairwise_bins_by_ranker: QueryPairwiseBinsByRanker):
     self.query_pairwise_bins_by_ranker = query_pairwise_bins_by_ranker
     self.snorkel_gm = GenerativeModel()
     self.is_trained = False