def orig_train_adjective_phase_classifier(path, adjective, all_features):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name
    dataset_file_name = "_".join(("trained", adjective)) + ".pkl"
    newpath = os.path.join(path, "trained_adjectives_univ_deep_search")
    path_name = os.path.join(newpath, dataset_file_name)

    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s" % adjective

    train_X = []

    for phase in phases:
        train_set = all_features[adjective][phase]["train"]
        train_X.append(train_set["features"])
        train_Y = train_set["labels"]
        object_ids = train_set["object_ids"]

    train_X = np.concatenate(train_X, axis=1)

    """ 
    # Scale the data
    scaler = preprocessing.StandardScaler().fit(train_X)
    train_X = scaler.transform(train_X)
    all_features[adjective]['scaler'] = scaler
    all_features[adjective]['train'] = train_X   # store off scaled

    # Remove features!
    all_features[adjective]['tree_features'] = remove_feature_tree_based(train_X,train_Y)

    print np.shape(train_X)

    train_X = all_features[adjective]['tree_features'][1]; # transformed features
    print np.shape(train_X)
   
    print "Training adjective %s" % adjective

    """

    if sum(train_Y) < 180:
        trained_clf, scaler = utilities.train_univariate_selection(
            train_X, train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True
        )
        print trained_clf

    else:
        trained_clf, scaler = utilities.train_svm_gridsearch(
            train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True
        )

    dataset = all_features[adjective]
    dataset["scaler"] = scaler
    dataset["adjective"] = adjective
    dataset["classifier"] = trained_clf

    print "Saving trained_classifier"

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name 
    dataset_file_name = "_".join(("trained", adjective))+".pkl"
    newpath = os.path.join(path, "trained_adjectives_univ_deep_search")
    path_name = os.path.join(newpath, dataset_file_name)
    
    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s" % adjective

    train_X = []

    for phase in phases:
        train_set = all_features[adjective][phase]['train']
        train_X.append(train_set['features'])
        train_Y = train_set['labels']
        object_ids = train_set['object_ids']

    train_X = np.concatenate(train_X, axis=1)
    
    ''' 
    # Scale the data
    scaler = preprocessing.StandardScaler().fit(train_X)
    train_X = scaler.transform(train_X)
    all_features[adjective]['scaler'] = scaler
    all_features[adjective]['train'] = train_X   # store off scaled

    # Remove features!
    all_features[adjective]['tree_features'] = remove_feature_tree_based(train_X,train_Y)

    print np.shape(train_X)

    train_X = all_features[adjective]['tree_features'][1]; # transformed features
    print np.shape(train_X)
   
    print "Training adjective %s" % adjective

    '''
   
    if sum(train_Y) < 180: 
        trained_clf, scaler = utilities.train_univariate_selection(train_X,train_Y,    
                                 verbose=True,
                                 object_ids = object_ids,
                                 n_jobs = 6,
                                 scale = True 
                                 )   
        print trained_clf

    else: 
        trained_clf,scaler = utilities.train_svm_gridsearch(train_X = train_X,
                             train_Y = train_Y,
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = True 
                             )

    dataset = all_features[adjective]
    dataset['scaler'] = scaler
    dataset['adjective'] = adjective
    dataset['classifier'] = trained_clf
   
    print "Saving trained_classifier" 

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def train_adjective_phase_classifier(path, adjective, phase, all_features, boost):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name 
    dataset_file_name = "_".join(("trained", adjective, phase))+".pkl"
    newpath = os.path.join(path, "trained_adjective_phase_univ")
    path_name = os.path.join(newpath, dataset_file_name)
    
    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s and phase %s" % (adjective, phase)

    train_set = all_features[adjective][phase]['train']
    train_X = train_set['features']
    train_Y = train_set['labels']
    object_ids = train_set['object_ids']

    ''' Use for tree selection
    # Scale the data
    scaler = preprocessing.StandardScaler().fit(train_X)
    train_X = scaler.transform(train_X)
    all_features[adjective][phase]['scaler'] = scaler
    all_features[adjective][phase]['train'] = train_X   # store off scaled


    print "Training adjective %s and phase %s" %(adjective, phase)

    # Remove features!
    all_features[adjective][phase]['tree_features'] = remove_feature_tree_based(train_X,train_Y)

    print np.shape(train_X)

    train_X = all_features[adjective][phase]['tree_features'][1]; # transformed features
    print np.shape(train_X)
    '''
    trained_clf, scaler = utilities.train_univariate_selection(train_X,train_Y,
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = True 
                             )   
    all_features[adjective][phase]['scaler'] = scaler
    all_features[adjective][phase]['tree_features'] = trained_clf 
   
    print trained_clf 
    '''
    if not boost:
        trained_clf, scaler = utilities.train_svm_gridsearch(train_X = train_X,
                             train_Y = train_Y,
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = False 
                             )   
    else: 
        trained_clf, scaler = utilities.train_gradient_boost(train_X = train_X,
                                train_Y = train_Y,
                                object_ids = object_ids,
                                verbose = True, 
                                n_jobs = 6,
                                scale = False 
                                )
    '''

    dataset = all_features[adjective][phase]
    dataset['adjective'] = adjective
    dataset['phase'] = phase
    dataset['classifier'] = trained_clf
   
    print "Saving trained_classifier" 

    # Save the results in the folder
    with open(path_name, "w") as f:
         print "Saving file: ", path_name
         cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
Пример #4
0
def train_adjective_phase_classifier(path, adjective, phase, all_features,
                                     boost):
    """
    Example function on how to access all of the features
    stored in adjective_phase_set
    """

    # File name
    dataset_file_name = "_".join(("trained", adjective, phase)) + ".pkl"
    newpath = os.path.join(path, "trained_adjective_phase_univ")
    path_name = os.path.join(newpath, dataset_file_name)

    if os.path.exists(path_name):
        print "File %s already exists, skipping it." % path_name
        return

    print "Creating adjective %s and phase %s" % (adjective, phase)

    train_set = all_features[adjective][phase]['train']
    train_X = train_set['features']
    train_Y = train_set['labels']
    object_ids = train_set['object_ids']
    ''' Use for tree selection
    # Scale the data
    scaler = preprocessing.StandardScaler().fit(train_X)
    train_X = scaler.transform(train_X)
    all_features[adjective][phase]['scaler'] = scaler
    all_features[adjective][phase]['train'] = train_X   # store off scaled


    print "Training adjective %s and phase %s" %(adjective, phase)

    # Remove features!
    all_features[adjective][phase]['tree_features'] = remove_feature_tree_based(train_X,train_Y)

    print np.shape(train_X)

    train_X = all_features[adjective][phase]['tree_features'][1]; # transformed features
    print np.shape(train_X)
    '''
    trained_clf, scaler = utilities.train_univariate_selection(
        train_X,
        train_Y,
        verbose=True,
        object_ids=object_ids,
        n_jobs=6,
        scale=True)
    all_features[adjective][phase]['scaler'] = scaler
    all_features[adjective][phase]['tree_features'] = trained_clf

    print trained_clf
    '''
    if not boost:
        trained_clf, scaler = utilities.train_svm_gridsearch(train_X = train_X,
                             train_Y = train_Y,
                             verbose=True,
                             object_ids = object_ids,
                             n_jobs = 6,
                             scale = False 
                             )   
    else: 
        trained_clf, scaler = utilities.train_gradient_boost(train_X = train_X,
                                train_Y = train_Y,
                                object_ids = object_ids,
                                verbose = True, 
                                n_jobs = 6,
                                scale = False 
                                )
    '''

    dataset = all_features[adjective][phase]
    dataset['adjective'] = adjective
    dataset['phase'] = phase
    dataset['classifier'] = trained_clf

    print "Saving trained_classifier"

    # Save the results in the folder
    with open(path_name, "w") as f:
        print "Saving file: ", path_name
        cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)