def create_single_dataset(database, path, adj_obj):
    """ 
    Creates a pickle file dataset for each motions for all objects with a particular adjective
    """
    adjective = adj_obj.adjective
    #import pdb; pdb.set_trace()
    # Test to see if any phase files already exist and only create the ones that don't, or skip all
    all_phase_list = phases

    phase_list = []
    nobuild = 0
    for phase in all_phase_list:
        phase_obj = PhaseClass()
        phase_obj.phase = phase;
        dataset_file_name = "_".join(("hmm_feature", adjective,phase))+".pkl"
        phase_obj.path_name = os.path.join(path, dataset_file_name)
        
        if os.path.exists(phase_obj.path_name):
            print "File %s already exists, skipping it." % phase_obj.path_name
            phase_obj.build = False
            nobuild = nobuild + 1
        else:
            phase_obj.build = True
            print "Creating adjective %s and phase %s" % (adjective, phase)
        phase_list.append(phase_obj)
    if nobuild == 4:
        print "All phases of adjective %s are already built. Moving on..." % adjective
        return

    # Open database and get train/test split
    database = tables.openFile(database)
    train_objs, test_objs = get_train_test_objects(database, adjective)
    # Select the features from the feature objects 
    
    feature_train_dict_list = create_hmm_feature_set(database, train_objs, adj_obj, phase_list)
    feature_test_dict_list = create_hmm_feature_set(database, test_objs, adj_obj, phase_list)

    for i, phase_object in enumerate(phase_list):
        if phase_object.build == True:
            # Store the train/test in a dataset
            #import pdb; pdb.set_trace()
            dataset = defaultdict(dict)
            dataset['train'] = feature_train_dict_list[i] 
            dataset['test'] = feature_test_dict_list[i]

            if len(dataset) is 0:
                print "Empty dataset for adj %s and phase %s" % (adjective, phase_object.phase)
                continue 

            print "Saving dataset to file"

            #import pdb; pdb.set_trace()
            # Save the results in the folder
            #Saves one file per motion. This needs to be a for loop
            with open(phase_object.path_name, "w") as f:
                print "Saving file: ", phase_object.path_name
                cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
Пример #2
0
    import pdb
    pdb.set_trace()
    # Initialize scores
    f1s = 0
    precs = 0
    recalls = 0
    total = 0

    # Setup text file to store values to
    adjective_report = open("adjective_score_report.txt", "w")
    adjective_report.write("Adjective, precision, recall, f1\n")

    for classifier in classifiers:
        try:
            # Pull out the objects that we want
            train_objs, test_objs = get_train_test_objects(
                database, classifier.adjective)

            # Compute score for each adjective
            p, r, f1 = test_adjective(classifier, database, test_objs,
                                      adjective_report)
            precs += p
            recalls += r
            f1s += f1
            total += 1

        except ValueError:
            print "Skipping values"
            continue

    adjective_report.close()
Пример #3
0
def create_single_dataset(database, path, adj_obj):
    """ 
    Creates a pickle file dataset for each motions for all objects with a particular adjective
    """
    adjective = adj_obj.adjective
    #import pdb; pdb.set_trace()
    # Test to see if any phase files already exist and only create the ones that don't, or skip all
    all_phase_list = phases

    phase_list = []
    nobuild = 0
    for phase in all_phase_list:
        phase_obj = PhaseClass()
        phase_obj.phase = phase
        dataset_file_name = "_".join(
            ("hmm_feature", adjective, phase)) + ".pkl"
        phase_obj.path_name = os.path.join(path, dataset_file_name)

        if os.path.exists(phase_obj.path_name):
            print "File %s already exists, skipping it." % phase_obj.path_name
            phase_obj.build = False
            nobuild = nobuild + 1
        else:
            phase_obj.build = True
            print "Creating adjective %s and phase %s" % (adjective, phase)
        phase_list.append(phase_obj)
    if nobuild == 4:
        print "All phases of adjective %s are already built. Moving on..." % adjective
        return

    # Open database and get train/test split
    database = tables.openFile(database)
    train_objs, test_objs = get_train_test_objects(database, adjective)
    # Select the features from the feature objects

    feature_train_dict_list = create_hmm_feature_set(database, train_objs,
                                                     adj_obj, phase_list)
    feature_test_dict_list = create_hmm_feature_set(database, test_objs,
                                                    adj_obj, phase_list)

    for i, phase_object in enumerate(phase_list):
        if phase_object.build == True:
            # Store the train/test in a dataset
            #import pdb; pdb.set_trace()
            dataset = defaultdict(dict)
            dataset['train'] = feature_train_dict_list[i]
            dataset['test'] = feature_test_dict_list[i]

            if len(dataset) is 0:
                print "Empty dataset for adj %s and phase %s" % (
                    adjective, phase_object.phase)
                continue

            print "Saving dataset to file"

            #import pdb; pdb.set_trace()
            # Save the results in the folder
            #Saves one file per motion. This needs to be a for loop
            with open(phase_object.path_name, "w") as f:
                print "Saving file: ", phase_object.path_name
                cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
    import pdb; pdb.set_trace()
    # Initialize scores
    f1s= 0
    precs = 0
    recalls = 0
    total = 0
    
    # Setup text file to store values to
    adjective_report = open("adjective_score_report.txt", "w")
    adjective_report.write("Adjective, precision, recall, f1\n")

    for classifier in classifiers:
        try:
            # Pull out the objects that we want
            train_objs, test_objs = get_train_test_objects(database, classifier.adjective)

            # Compute score for each adjective 
            p, r, f1 = test_adjective(classifier, database, test_objs, adjective_report)
            precs += p
            recalls += r
            f1s += f1
            total += 1

        except ValueError:
            print "Skipping values"
            continue

    adjective_report.close()

    print "Average f1s: ", f1s / total