def extract_features_and_generate_model(essays, algorithm=util_functions.AlgorithmTypes.regression):
    """
    Feed in an essay set to get feature vector and classifier
    essays must be an essay set object
    additional array is an optional argument that can specify
    a numpy array of values to add in
    returns a trained FeatureExtractor object and a trained classifier
    """
    f = feature_extractor.FeatureExtractor()
    f.initialize_dictionaries(essays)

    train_feats = f.gen_feats(essays)

    set_score = numpy.asarray(essays._score, dtype=numpy.int)
    if len(util_functions.f7(list(set_score)))>5:
        algorithm = util_functions.AlgorithmTypes.regression
    else:
        algorithm = util_functions.AlgorithmTypes.classification

    clf,clf2 = get_algorithms(algorithm)

    cv_error_results=get_cv_error(clf2,train_feats,essays._score)

    try:
        clf.fit(train_feats, set_score)
    except ValueError:
        log.exception("Not enough classes (0,1,etc) in sample.")
        set_score[0]=1
        set_score[1]=0
        clf.fit(train_feats, set_score)

    return f, clf, cv_error_results
예제 #2
0
def extract_features_and_generate_model(essays, type=util_functions.AlgorithmTypes.regression):
    """
    Feed in an essay set to get feature vector and classifier
    essays must be an essay set object
    additional array is an optional argument that can specify
    a numpy array of values to add in
    returns a trained FeatureExtractor object and a trained classifier
    """
    f = feature_extractor.FeatureExtractor()
    f.initialize_dictionaries(essays)

    train_feats = f.gen_feats(essays)

    set_score = numpy.asarray(essays._score, dtype=numpy.int)
    if len(util_functions.f7(list(set_score))) > 5:
        type = util_functions.AlgorithmTypes.regression
    else:
        type = util_functions.AlgorithmTypes.classification

    clf, clf2 = get_algorithms(type)

    cv_error_results = get_cv_error(clf2, train_feats, essays._score)

    try:
        clf.fit(train_feats, set_score)
    except ValueError:
        log.exception("Not enough classes (0,1,etc) in sample.")
        set_score[0] = 1
        set_score[1] = 0
        clf.fit(train_feats, set_score)

    return f, clf, cv_error_results
예제 #3
0
def select_algorithm(score_list):
    #Decide what algorithm to use (regression or classification)
    try:
        #Count the number of unique score points in the score list
        if len(util_functions.f7(list(score_list))) > 5:
            algorithm = util_functions.AlgorithmTypes.regression
        else:
            algorithm = util_functions.AlgorithmTypes.classification
    except:
        algorithm = util_functions.AlgorithmTypes.regression

    return algorithm
예제 #4
0
파일: create.py 프로젝트: 23dhananjay/ease
def select_algorithm(score_list):
    #Decide what algorithm to use (regression or classification)
    try:
        #Count the number of unique score points in the score list
        if len(util_functions.f7(list(score_list)))>5:
            algorithm = util_functions.AlgorithmTypes.regression
        else:
            algorithm = util_functions.AlgorithmTypes.classification
    except:
        algorithm = util_functions.AlgorithmTypes.regression

    return algorithm
예제 #5
0
파일: create.py 프로젝트: Fyre91/ease
def create(text,score,prompt_string):
    """
    Creates a machine learning model from input text, associated scores, a prompt, and a path to the model
    TODO: Remove model path argument, it is needed for now to support legacy code
    text - A list of strings containing the text of the essays
    score - a list of integers containing score values
    prompt_string - the common prompt for the set of essays
    """

    #Initialize a results dictionary to return
    results = {'errors': [],'success' : False, 'cv_kappa' : 0, 'cv_mean_absolute_error': 0,
               'feature_ext' : "", 'classifier' : "", 'algorithm' : util_functions.AlgorithmTypes.classification,
               'score' : score, 'text' : text, 'prompt' : prompt_string}

    if len(text)!=len(score):
        msg = "Target and text lists must be same length."
        results['errors'].append(msg)
        log.exception(msg)
        return results

    #Decide what algorithm to use (regression or classification)
    try:
        #Count the number of unique score points in the score list
        if len(util_functions.f7(list(score)))>5:
            type = util_functions.AlgorithmTypes.regression
        else:
            type = util_functions.AlgorithmTypes.classification
    except:
        type = util_functions.AlgorithmTypes.regression

    try:
        #Create an essay set object that encapsulates all the essays and alternate representations (tokens, etc)
        e_set = model_creator.create_essay_set(text, score, prompt_string)
    except:
        msg = "essay set creation failed."
        results['errors'].append(msg)
        log.exception(msg)
    try:
        #Gets features from the essay set and computes error
        feature_ext, classifier, cv_error_results = model_creator.extract_features_and_generate_model(e_set, type=type)
        results['cv_kappa']=cv_error_results['kappa']
        results['cv_mean_absolute_error']=cv_error_results['mae']
        results['feature_ext']=feature_ext
        results['classifier']=classifier
        results['algorithm'] = type
        results['success']=True
    except:
        msg = "feature extraction and model creation failed."
        results['errors'].append(msg)
        log.exception(msg)

    return results
예제 #6
0
파일: create.py 프로젝트: hughdbrown/ease
def create(text, score, prompt_string):
    """
    Creates a machine learning model from input text, associated scores, a prompt, and a path to the model
    TODO: Remove model path argument, it is needed for now to support legacy code
    text - A list of strings containing the text of the essays
    score - a list of integers containing score values
    prompt_string - the common prompt for the set of essays
    """

    # Initialize a results dictionary to return
    results = {
        'errors': [],
        'success': False,
        'cv_kappa': 0,
        'cv_mean_absolute_error': 0,
        'feature_ext': "",
        'classifier': "",
        'algorithm': util_functions.AlgorithmTypes.classification,
        'score': score,
        'text': text,
        'prompt': prompt_string
    }

    if len(text) != len(score):
        msg = "Target and text lists must be same length."
        results['errors'].append(msg)
        log.exception(msg)
        return results

    # Decide what algorithm to use (regression or classification)
    try:
        # Count the number of unique score points in the score list
        if len(util_functions.f7(list(score))) > 5:
            type = util_functions.AlgorithmTypes.regression
        else:
            type = util_functions.AlgorithmTypes.classification
    except:
        type = util_functions.AlgorithmTypes.regression

    try:
        # Create an essay set object that encapsulates all the essays and alternate representations (tokens, etc)
        e_set = model_creator.create_essay_set(text, score, prompt_string)
    except:
        msg = "essay set creation failed."
        results['errors'].append(msg)
        log.exception(msg)
    try:
        # Gets features from the essay set and computes error
        feature_ext, classifier, cv_error_results = model_creator.extract_features_and_generate_model(
            e_set, type=type)
        results['cv_kappa'] = cv_error_results['kappa']
        results['cv_mean_absolute_error'] = cv_error_results['mae']
        results['feature_ext'] = feature_ext
        results['classifier'] = classifier
        results['algorithm'] = type
        results['success'] = True
    except:
        msg = "feature extraction and model creation failed."
        results['errors'].append(msg)
        log.exception(msg)

    return results