Python SnorkelSession.commit примеры использования

Язык программирования: Python

Пространство имен/Пакет: snorkel

Класс/Тип: SnorkelSession

Метод/Функция: commit

Примеров на hotexamples.com: 3

Python SnorkelSession.commit - 3 примера найдено. Это лучшие примеры Python кода для snorkel.SnorkelSession.commit, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

query(28)

SnorkelSession(21)

add(4)

execute(4)

commit(3)

Пример #1

Показать файл

def reload_external_labels(session: SnorkelSession,
                           input_file: Union[str, Path],
                           annotator_name: str = "gold"):
    Education = get_candidate_class()
    with open(str(input_file), "r") as f:
        lbls = ujson.load(f)

    for lbl in lbls:
        # we check if the label already exists, in case this cell was already executed
        context_stable_ids = "~~".join((lbl['person'], lbl['organization']))
        query = session.query(StableLabel).filter(
            StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(
                StableLabel(context_stable_ids=context_stable_ids,
                            annotator_name=annotator_name,
                            value=lbl['value']))

    # commit session
    session.commit()

    # reload annotator labels
    reload_annotator_labels(session,
                            Education,
                            annotator_name,
                            split=1,
                            filter_label_split=False)
    reload_annotator_labels(session,
                            Education,
                            annotator_name,
                            split=2,
                            filter_label_split=False)

Пример #2

Показать файл

Файл: ir_signals_denoising.py Проект: dkrigel/snorkel

    random.shuffle(cand_list)
    #take 10 percent from here
    train_cand_list += cand_list[0:int(len(cand_list) * 0.4)]

print(" -number of pairs:", len(cand_dict))
print(" -number of signals:", n)
print(" -number of pair to train GEN model", len(train_cand_list))

for i, cand in enumerate(cand_list):
    split = 0 if cand in train_cand_list else 1

    raw_text = RawText(stable_id=cand, name=cand, text=cand)
    tweet = Tweet(tweet=raw_text, split=split)
    session.add(tweet)

session.commit()

print("Commit to snorkel database done...")


#writing label generator
def worker_label_generator(t):
    for worker_id in cand_dict[t.tweet.stable_id]:
        yield worker_id, cand_dict[t.tweet.stable_id][worker_id]


np.random.seed(1701)
labeler = LabelAnnotator(label_generator=worker_label_generator)
L_train = labeler.apply(split=0)

print(L_train.lf_stats(session))

Пример #3

Показать файл

Файл: pipeline.py Проект: varun-tandon/MarkerVilleBackendAlpha

def run(candidate1, candidate2, pairing_name, cand1_ngrams, cand2_ngrams,
        cand1Matcher, cand2Matcher, model_name, output_file_name,
        corpus_parser):
    print "Started"
    session = SnorkelSession()

    # The following line is for testing only. Feel free to ignore it.

    candidate_pair = candidate_subclass(pairing_name, [candidate1, candidate2])

    sentences = set()
    docs = session.query(Document).order_by(Document.name).all()
    for doc in docs:
        for s in doc.sentences:
            sentences.add(s)

    cand_1_ngrams = Ngrams(n_max=cand1_ngrams)
    # condition_ngrams = Ngrams(n_max=7)
    cand_2_ngrams = Ngrams(n_max=cand2_ngrams)
    # medium_ngrams = Ngrams(n_max=5)
    # type_ngrams = Ngrams(n_max=5)  # <--- Q: should we cut these down?
    # # level_ngrams = Ngrams(n_max=1)
    # unit_ngrams = Ngrams(n_max=1)

    # Construct our Matchers

    # cMatcher = matchers.getConditionMatcher()
    # mMatcher = matchers.getMediumMatcher()
    # tMatcher = matchers.getTypeMatcher()
    # lMatcher = matchers.getLevelMatcher()
    # uMatcher = matchers.getUnitMatcher()

    # Building the CandidateExtractors
    # candidate_extractor_BC = CandidateExtractor(BiomarkerCondition, [biomarker_ngrams, condition_ngrams], [bMatcher, cMatcher])
    candidate_extractor = CandidateExtractor(candidate_pair,
                                             [cand_1_ngrams, cand_2_ngrams],
                                             [cand1Matcher, cand2Matcher])
    # candidate_extractor_BM = CandidateExtractor(BiomarkerMedium, [biomarker_ngrams, medium_ngrams], [bMatcher, mMatcher])
    # candidate_extractor_BT = CandidateExtractor(BiomarkerType, [biomarker_ngrams, type_ngrams], [bMatcher, tMatcher])
    # candidate_extractor_BLU = CandidateExtractor(BiomarkerLevelUnit, [biomarker_ngrams, level_ngrams, unit_ngrams], [bMatcher, lMatcher, uMatcher])

    # List of Candidate Sets for each relation type: [train, dev, test]
    candidate_extractor.apply(sentences, split=4, clear=True)
    cands = session.query(candidate_pair).filter(
        candidate_pair.split == 4).order_by(candidate_pair.id).all()
    session.commit()
    # cands_BD = grabCandidates(candidate_extractor_BD, BiomarkerDrug)
    # cands_BM = grabCandidates(candidate_extractor_BM, BiomarkerMedium)
    # cands_BT = grabCandidates(candidate_extractor_BT, BiomarkerType)
    # cands_BLU = grabCandidates(candidate_extractor_BLU, BiomarkerLevelUnit)

    if (len(cands)) == 0:
        print "No Candidates Found"
        return
    if (pairing_name == 'BiomarkerCondition'):
        # session.rollback()
        # print "Number of dev BC candidates without adj. boosting: ", len(cands_BC[1])
        add_adj_candidate_BC(session, candidate_pair, cands, 4)
        # fix_specificity(session, BiomarkerCondition, cands_BC[1])
        # print "Number of dev BC candidates with adj. boosting: ", session.query(BiomarkerCondition).filter(BiomarkerCondition.split == 4).count()
        session.commit()

    lstm = reRNN(seed=1701, n_threads=None)

    lstm.load(model_name)

    predictions = lstm.predictions(cands)
    output_file = open(output_file_name, 'wb')
    import csv
    csvWriter = csv.writer(output_file)
    csvWriter.writerow(
        ['doc_id', 'sentence', candidate1, candidate2, 'prediction'])
    for i in range(len(cands)):
        doc_string = 'PMC' + str(cands[i].get_parent().get_parent())[9:]
        sentence_string = cands[i].get_parent().text
        cand_1_string = cands[i].get_contexts()[0].get_span()
        cand_2_string = cands[i].get_contexts()[1].get_span()
        prediction = predictions[i]
        csvWriter.writerow([
            unidecode(doc_string),
            unidecode(sentence_string),
            unidecode(cand_1_string),
            unidecode(cand_2_string), prediction
        ])