Пример #1
0
def orig_train_adjective_phase_classifier(path, adjective, all_features):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name
    dataset_file_name = "_".join(("trained", adjective)) + ".pkl"
    newpath = os.path.join(path, "trained_adjectives")
    path_name = os.path.join(newpath, dataset_file_name)

    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s" % adjective

    train_X = []

    for phase in phases:
        train_set = all_features[adjective][phase]['train']
        train_X.append(train_set['features'])
        train_Y = train_set['labels']
        object_ids = train_set['object_ids']

    train_X = np.concatenate(train_X, axis=1)

    print "Training adjective %s" % adjective

    if True:
        trained_clf, scaler = utilities.train_svm_gridsearch(
            train_X=train_X,
            train_Y=train_Y,
            verbose=True,
            object_ids=object_ids,
            n_jobs=6,
            scale=True)
    else:
        trained_clf = utilities.train_gradient_boost(
            train_X=train_X,
            train_Y=train_Y,
            object_ids=object_ids,
        )

    dataset = all_features[adjective]
    dataset['adjective'] = adjective
    dataset['classifier'] = trained_clf
    dataset['scaler'] = scaler

    print "Saving trained_classifier"

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name 
    dataset_file_name = "_".join(("trained", adjective))+".pkl"
    newpath = os.path.join(path, "trained_adjectives")
    path_name = os.path.join(newpath, dataset_file_name)
    
    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s" % adjective

    train_X = []

    for phase in phases:
        train_set = all_features[adjective][phase]['train']
        train_X.append(train_set['features'])
        train_Y = train_set['labels']
        object_ids = train_set['object_ids']

    train_X = np.concatenate(train_X, axis=1)

    print "Training adjective %s" % adjective

    if True:
        trained_clf,scaler = utilities.train_svm_gridsearch(train_X = train_X,
                             train_Y = train_Y,
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = True 
                             )
    else: 
        trained_clf = utilities.train_gradient_boost(train_X = train_X,
                                train_Y = train_Y,
                                object_ids = object_ids,
                                )
    
    dataset = all_features[adjective]
    dataset['adjective'] = adjective
    dataset['classifier'] = trained_clf
    dataset['scaler'] = scaler
   
    print "Saving trained_classifier" 

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name
    dataset_file_name = "_".join(("trained", adjective)) + ".pkl"
    newpath = os.path.join(path, "trained_adjectives_univ_deep_search")
    path_name = os.path.join(newpath, dataset_file_name)

    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s" % adjective

    train_X = []

    for phase in phases:
        train_set = all_features[adjective][phase]["train"]
        train_X.append(train_set["features"])
        train_Y = train_set["labels"]
        object_ids = train_set["object_ids"]

    train_X = np.concatenate(train_X, axis=1)

    """ 
    # Scale the data
    scaler = preprocessing.StandardScaler().fit(train_X)
    train_X = scaler.transform(train_X)
    all_features[adjective]['scaler'] = scaler
    all_features[adjective]['train'] = train_X   # store off scaled

    # Remove features!
    all_features[adjective]['tree_features'] = remove_feature_tree_based(train_X,train_Y)

    print np.shape(train_X)

    train_X = all_features[adjective]['tree_features'][1]; # transformed features
    print np.shape(train_X)
   
    print "Training adjective %s" % adjective

    """

    if sum(train_Y) < 180:
        trained_clf, scaler = utilities.train_univariate_selection(
            train_X, train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True
        )
        print trained_clf

    else:
        trained_clf, scaler = utilities.train_svm_gridsearch(
            train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True
        )

    dataset = all_features[adjective]
    dataset["scaler"] = scaler
    dataset["adjective"] = adjective
    dataset["classifier"] = trained_clf

    print "Saving trained_classifier"

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
Пример #4
0
def orig_train_adjective_phase_classifier(path, adjective, all_features):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name
    dataset_file_name = "_".join(("trained", adjective)) + ".pkl"
    newpath = os.path.join(path, "trained_adjectives_tree")
    path_name = os.path.join(newpath, dataset_file_name)

    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s" % adjective

    train_X = []

    for phase in phases:
        train_set = all_features[adjective][phase]['train']
        train_X.append(train_set['features'])
        train_Y = train_set['labels']
        object_ids = train_set['object_ids']

    train_X = np.concatenate(train_X, axis=1)

    # Scale the data
    scaler = preprocessing.StandardScaler().fit(train_X)
    train_X = scaler.transform(train_X)
    all_features[adjective]['scaler'] = scaler
    all_features[adjective]['train'] = train_X  # store off scaled

    # Remove features!
    all_features[adjective]['tree_features'] = remove_feature_tree_based(
        train_X, train_Y)

    print np.shape(train_X)

    train_X = all_features[adjective]['tree_features'][1]
    # transformed features
    print np.shape(train_X)

    print "Training adjective %s" % adjective
    '''
    trained_clf, scaler = utilities.train_univariate_selection(train_X,train_Y,    
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = True 
                             )   
    all_features[adjective][phase]['scaler'] = scaler
    all_features[adjective][phase]['univ_select'] = trained_clf

    print trained_clf

    '''
    if True:
        trained_clf, scaler = utilities.train_svm_gridsearch(
            train_X=train_X,
            train_Y=train_Y,
            verbose=True,
            object_ids=object_ids,
            n_jobs=6,
            scale=False)
    else:
        trained_clf = utilities.train_gradient_boost(
            train_X=train_X,
            train_Y=train_Y,
            object_ids=object_ids,
        )

    dataset = all_features[adjective]
    dataset['adjective'] = adjective
    dataset['classifier'] = trained_clf

    print "Saving trained_classifier"

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def original_train_adjective_phase_classifier(path1, path2, adjective, phase, all_features1, all_features2, boost):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name 
    dataset_file_name = "_".join(("trained", adjective, phase))+".pkl"
    newpath = os.path.join(path1, "trained_adjective_phase_merge")
    path_name = os.path.join(newpath, dataset_file_name)
    
    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s and phase %s" % (adjective, phase)

    # First set of features
    train_set = all_features1[adjective][phase]['train']
    train_X1 = train_set['features']
    train_Y = train_set['labels']
    object_ids = train_set['object_ids']
   
    # test set 
    test_set1 = all_features1[adjective][phase]['test']
    test_X1 = test_set1['features']
    test_set2 = all_features2[adjective][phase]['test']
    test_X2 = test_set2['features']
    test_X = np.concatenate((test_X1,test_X2),axis=1)
    all_features1[adjective][phase]['test'] = test_X
	
    # Second set of features
    train_set2 = all_features2[adjective][phase]['train']
    train_X2 = train_set2['features']
  
    # Merge the two for a nx51 vector
    train_X = np.concatenate((train_X1, train_X2), axis=1) 

    print "Training adjective %s and phase %s" %(adjective, phase)

    if not boost:
        trained_clf, scaler = utilities.train_svm_gridsearch(train_X = train_X,
                             train_Y = train_Y,
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = True 
                             )   
    else: 
        trained_clf, scaler = utilities.train_gradient_boost(train_X = train_X,
                                train_Y = train_Y,
                                object_ids = object_ids,
                                verbose = True, 
                                n_jobs = 6,
                                scale = True
                                )

    dataset = all_features1[adjective][phase]
    dataset['adjective'] = adjective
    dataset['phase'] = phase
    dataset['classifier'] = trained_clf
    dataset['scaler'] = scaler
   
    print "Saving trained_classifier" 

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def original_train_adjective_phase_classifier(path1, path2, adjective, phase,
                                              all_features1, all_features2,
                                              boost):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name
    dataset_file_name = "_".join(("trained", adjective, phase)) + ".pkl"
    newpath = os.path.join(path1, "trained_adjective_phase_merge")
    path_name = os.path.join(newpath, dataset_file_name)

    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s and phase %s" % (adjective, phase)

    # First set of features
    train_set = all_features1[adjective][phase]['train']
    train_X1 = train_set['features']
    train_Y = train_set['labels']
    object_ids = train_set['object_ids']

    # test set
    test_set1 = all_features1[adjective][phase]['test']
    test_X1 = test_set1['features']
    test_set2 = all_features2[adjective][phase]['test']
    test_X2 = test_set2['features']
    test_X = np.concatenate((test_X1, test_X2), axis=1)
    all_features1[adjective][phase]['test'] = test_X

    # Second set of features
    train_set2 = all_features2[adjective][phase]['train']
    train_X2 = train_set2['features']

    # Merge the two for a nx51 vector
    train_X = np.concatenate((train_X1, train_X2), axis=1)

    print "Training adjective %s and phase %s" % (adjective, phase)

    if not boost:
        trained_clf, scaler = utilities.train_svm_gridsearch(
            train_X=train_X,
            train_Y=train_Y,
            verbose=True,
            object_ids=object_ids,
            n_jobs=6,
            scale=True)
    else:
        trained_clf, scaler = utilities.train_gradient_boost(
            train_X=train_X,
            train_Y=train_Y,
            object_ids=object_ids,
            verbose=True,
            n_jobs=6,
            scale=True)

    dataset = all_features1[adjective][phase]
    dataset['adjective'] = adjective
    dataset['phase'] = phase
    dataset['classifier'] = trained_clf
    dataset['scaler'] = scaler

    print "Saving trained_classifier"

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)