示例#1
0
def train_models_2(train_sentences, filter) -> tuple:

    # Prune the train data set
    train_words = prune_sentences(train_sentences, filter, balance=False)

    # Train on pruned sentences
    print(f"Number of datapoints at training identifier: {len(train_words)}")
    id_clf, id_report = train_classifier(train_words,
                                         bool_result=True,
                                         prob=False)

    # # Filter the words using the identification classifier similarly to testing
    # for chunk in chunks(train_words, 4):
    #     test_classifier(id_clf, chunk, bool_result=True)
    # train_words = []
    # for sentence in train_sentences:
    #     words = sentence.getTreeNodesOrdered()
    #     for w in words:
    #         prediction = w.getPrediction()
    #         if prediction == 1:
    #             # add the word to the list to be labeled
    #             train_words.append(w)

    filter["prune"] = 2
    train_words = prune_sentences(train_sentences, filter, balance=False)

    print(f"Number of datapoints at training labeler: {len(train_words)}")
    label_clf, label_report = train_classifier(train_words,
                                               bool_result=False,
                                               prob=True)

    return id_clf, label_clf, f"{id_report}\n{label_report}"
def train_classifiers(data_file, cache_dir=os.path.curdir):
    data = read_data(data_file)
    for matter in ["WM", "GM"]:
        classifier_file = os.path.join(
            cache_dir, "Classifier",
            "{0}_matter_classifier.pkl".format(matter))
        if not os.path.exists(os.path.dirname(classifier_file)):
            os.makedirs(os.path.dirname(classifier_file))
        train_classifier(data["Features"].values, data["Truth"][matter].values,
                         classifier_file)
示例#3
0
def save_suggestion_feedback(sessionId, context, feedback):
    user = User.query.filter_by(session_id=sessionId).first()
    gauss_object = Classifiers.query.filter_by(user_id=user.id).first()
    gauss_clf = gauss_object.pickled_classifier

    classified = 1 if feedback else 0
    visited = UserVisitedListings(user_id=user.id,listing=context['id'], like=feedback)

    train_classifier([context['description']], [classified], gauss_clf)
    Classifiers.query.filter_by(user_id=user.id).update(dict(pickled_classifier=gauss_clf))
    db.session.add(visited)
    db.session.commit()
示例#4
0
def get_sentiment(comments,comments_ratio):
    #comments = youtube_comments.get_youtube_comments(video_id)
    #comments = download_comment.commentExtract(video_id)
    #print("---------comments extracted--------", comments)
    pos = 0
    neg = 0
    comments_list = []
    print("############################IN HERE###################")
    positive_comments = []
    negative_comments = []
    mixed_comments = []
    for com in comments:
        filtered_comments = filter.filter_comments(com)

        training.train_classifier()
        classifier_f = open("classifier.pickle", "rb")
        classifier = pickle.load(classifier_f)
        classifier_f.close()

        for comment in filtered_comments:
            result = classifier.classify(bag_of_words(comment))
            if result == "pos":
                pos += 1   
            else:
                neg += 1

        result, score = calculate_score(pos,neg,comments_ratio)

        if(result == "positive"):
            positive_comments.append(com)
            temp_dict = {'Comment': com, 'Score': score, 'Sentiment': 'positive'}
        if(result == "negative"):
            negative_comments.append(com)
            temp_dict = {'Comment': com, 'Score': score, 'Sentiment': 'negative'}
        if(result == "mixed"):
            mixed_comments.append(com)
            temp_dict = {'Comment': com, 'Score': score, 'Sentiment': 'mixed'}
        comments_list.append(temp_dict)
    #no_of_likes , no_of_dislikes = youtube_stats.get_likes_dislikes(video_id)
    #print(no_of_likes," - ",no_of_dislikes)
    dictionary_comments = {'Positive Comments': positive_comments, 'Negative Comments': negative_comments, 'Mixed Comments': mixed_comments}

    #overall_result = calculate_score(pos,neg,no_of_likes,no_of_dislikes,comments_ratio)

    return comments_list
示例#5
0
def train_models(train_sentences, filter) -> tuple:

    # Prune the train data set
    train_words = prune_sentences(train_sentences, filter, balance=False)

    # Train on pruned sentences
    print(f"Number of datapoints at training identifier: {len(train_words)}")
    id_clf, id_report = train_classifier(train_words,
                                         bool_result=True,
                                         prob=False)

    train_words = prune_sentences(train_sentences, filter, balance=False)

    print(f"Number of datapoints at training labeler: {len(train_words)}")
    label_clf, label_report = train_classifier(train_words,
                                               bool_result=False,
                                               prob=True)

    return id_clf, label_clf, f"{id_report}\n{label_report}"
options = vars(parser.parse_args())

sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from dataloader import CustomDataloader, FlexibleCustomDataloader
from training import train_classifier
from networks import build_networks, save_networks, get_optimizers
from options import load_options, get_current_epoch
from comparison import evaluate_with_comparison
from evaluation import save_evaluation

options = load_options(options)
dataloader = FlexibleCustomDataloader(fold='train', **options)
networks = build_networks(dataloader.num_classes, **options)
optimizers = get_optimizers(networks, finetune=True, **options)

eval_dataloader = CustomDataloader(last_batch=True,
                                   shuffle=False,
                                   fold='test',
                                   **options)

start_epoch = get_current_epoch(options['result_dir']) + 1
for epoch in range(start_epoch, start_epoch + options['epochs']):
    train_classifier(networks, optimizers, dataloader, epoch=epoch, **options)
    #print(networks['classifier_kplusone'])
    #weights = networks['classifier_kplusone'].fc1.weight
    eval_results = evaluate_with_comparison(networks, eval_dataloader,
                                            **options)
    pprint(eval_results)
    save_evaluation(eval_results, options['result_dir'], epoch)
    save_networks(networks, epoch, options['result_dir'])
示例#7
0
from options import save_options, load_options, get_current_epoch
from locking import acquire_lock, release_lock
from imutil import encode_video

if os.path.exists(options['result_dir']):
    options = load_options(options)

dataloader = FlexibleCustomDataloader(fold='train', **options)
networks = build_networks(dataloader.num_classes, **options)
optimizers = get_optimizers(networks, **options)

save_options(options)
start_epoch = get_current_epoch(options['result_dir']) + 1
acquire_lock(options['result_dir'])
try:
    for epoch in range(start_epoch, start_epoch + options['epochs']):
        # Apply learning rate decay
        """
        for name, optimizer in optimizers.items():
            MAX_EPOCH = 100
            optimizer.param_groups[0]['lr'] = options['lr'] * (options['decay'] ** min(epoch, MAX_EPOCH))
        """
        video_filename = train_classifier(networks,
                                          optimizers,
                                          dataloader,
                                          epoch=epoch,
                                          **options)
        save_networks(networks, epoch, options['result_dir'])
finally:
    release_lock(options['result_dir'])
示例#8
0
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=options['batch_size'],
                                           shuffle=True,
                                           num_workers=8,
                                           pin_memory=True,
                                           drop_last=True)

val_loader = torch.utils.data.DataLoader(OpenSetImageFolder(
    valdir,
    val_transforms,
    seed=options['seed'],
    num_classes=options['num_classes']),
                                         batch_size=options['batch_size'],
                                         shuffle=True,
                                         num_workers=8,
                                         pin_memory=True)

start_epoch = get_current_epoch(options['result_dir']) + 1
for epoch in range(start_epoch, start_epoch + options['epochs']):
    train_results = train_classifier(networks,
                                     optimizers,
                                     train_loader,
                                     epoch=epoch,
                                     **options)
    eval_results = evaluate_with_comparison(networks, val_loader, **options)
    print('[Epoch {}] errC {} errOpenSet {} ClosedSetAcc{}'.format(
        epoch, train_results['errC'], train_results['errOpenSet'],
        eval_results['classifier_closed_set_accuracy']))
    save_evaluation(eval_results, options['result_dir'], epoch)
    save_networks(networks, epoch, options['result_dir'])