Пример #1
0
import scipy
import numpy as np

import icgauge
from icgauge import experiment_frameworks
from icgauge import feature_extractors

run_experiment = True

if run_experiment:
    corr, alpha, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
        train_reader=icgauge.data_readers.train_and_dev,
        assess_reader=icgauge.data_readers.test,  #_official, 
        train_size=0.7,
        phi_list=[icgauge.feature_extractors.simple_features],
        class_func=icgauge.label_transformers.
        identity_class_func,  #ternary_class_func
        train_func=icgauge.training_functions.
        fit_logistic_at,  #_with_crossvalidation,
        score_func=scipy.stats.stats.pearsonr,
        verbose=False,
        iterations=1)

    # Print out the results
    print "\n-- AFTER COMPLETION --"
    print "Averaged correlation (95% CI): "
    print np.round(np.mean(corr), 2), "+/-", np.round(np.std(corr), 2)
    print "All correlations:"
    print corr
    print
    print "Averaged Cronbach's alpha (95% CI): "
    print np.round(np.mean(alpha), 2), "+/-", np.round(np.std(alpha), 2)
Пример #2
0
print "  Labels:    original 7-point scale"
print "  Model:     ordinal logistic model, all-threshold variant"
print "             (ordinal classification, recommended based on"
print "             experiments reported in Rennie and Srebro 2005"
print "             and replicated with toy.json)"
print

corr, conf_matrix = experiment_frameworks.experiment_features_iterated(
    train_reader=icgauge.data_readers.toy,
    assess_reader=None,
    train_size=0.7,
    phi_list=[
        icgauge.feature_extractors.manual_content_flags,
        icgauge.feature_extractors.length,
        icgauge.feature_extractors.modal_presence,
        icgauge.feature_extractors.hedge_presence,
        icgauge.feature_extractors.conjunctives_presence,
        icgauge.feature_extractors.punctuation_presence
    ],
    class_func=icgauge.label_transformers.identity_class_func,
    train_func=icgauge.training_functions.fit_logistic_at_with_crossvalidation,
    score_func=scipy.stats.stats.pearsonr,
    verbose=False,
    iterations=10)

print "Iterated result: "
print np.mean(corr)
print corr
print conf_matrix
print np.sum(conf_matrix)
"""
Пример #3
0
import scipy
import numpy as np

import icgauge
from icgauge import experiment_frameworks
from icgauge import feature_extractors

run_experiment = True

if run_experiment:
  corr, alpha, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
      train_reader=icgauge.data_readers.train_and_dev, 
      assess_reader=icgauge.data_readers.test,#_official, 
      train_size=0.7,
      phi_list=[
                 icgauge.feature_extractors.simple_features
               ], 
      class_func=icgauge.label_transformers.identity_class_func, #ternary_class_func
      train_func=icgauge.training_functions.fit_logistic_at,#_with_crossvalidation,
      score_func=scipy.stats.stats.pearsonr,
      verbose=False,
      iterations=1)

  # Print out the results
  print "\n-- AFTER COMPLETION --"
  print "Averaged correlation (95% CI): " 
  print np.round(np.mean(corr),2), "+/-", np.round(np.std(corr),2)
  print "All correlations:"
  print corr
  print
  print "Averaged Cronbach's alpha (95% CI): " 
  print np.round(np.mean(alpha),2), "+/-", np.round(np.std(alpha),2)
Пример #4
0
import icgauge
from icgauge import experiment_frameworks
from icgauge import feature_extractors

run_experiment = True

if run_experiment:
  corr, alpha, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
      train_reader=icgauge.data_readers.train_and_dev, 
      assess_reader=None, #icgauge.data_readers.test_official, 
      train_size=0.7,
      phi_list=[
        # These two features were used to control for the effect of number
        # of words and to get at the effect of the decomposition itself
        #        icgauge.feature_extractors.number_words_only,
        #        icgauge.feature_extractors.dimensional_decomposition,
        # The real feature function for PCA, though, is:
                 icgauge.feature_extractors.semcom_pca_features
               ], 
      class_func=icgauge.label_transformers.identity_class_func, #ternary_class_func
      train_func=icgauge.training_functions.fit_logistic_at,#_with_crossvalidation,
      score_func=scipy.stats.stats.pearsonr,
      verbose=False,
      iterations=1)

  # Print out the results
  print "\n-- AFTER COMPLETION --"
  print "Averaged correlation (95% CI): " 
  print np.round(np.mean(corr),2), "+/-", np.round(np.std(corr),2)
  print "All correlations:"
  print corr
Пример #5
0
print "             hedges, conjunctives, punctuation, determiner_usage"
print "  Labels:    original 7-point scale"
print "  Model:     ordinal logistic model, all-threshold variant"
print "             (ordinal classification, recommended based on"
print "             experiments reported in Rennie and Srebro 2005)"
print


corr, conf_matrix = experiment_frameworks.experiment_features_iterated(
    train_reader=icgauge.data_readers.toy, 
    assess_reader=None, 
    train_size=0.7,
    phi_list=[icgauge.feature_extractors.manual_content_flags,
              icgauge.feature_extractors.length,
              icgauge.feature_extractors.modal_presence,
              icgauge.feature_extractors.hedge_presence,
              icgauge.feature_extractors.conjunctives_presence,
              icgauge.feature_extractors.punctuation_presence,
              icgauge.feature_extractors.determiner_usage], 
    class_func=icgauge.label_transformers.identity_class_func,
    train_func=icgauge.training_functions.fit_logistic_at_with_crossvalidation,
    score_func=scipy.stats.stats.pearsonr,
    verbose=False,
    iterations=10)

print "Iterated result: " 
print np.mean(corr)
print corr
print conf_matrix
print np.sum(conf_matrix)

"""
Пример #6
0
from icgauge import experiment_frameworks

corr, alpha, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
    train_reader=icgauge.data_readers.train,
    assess_reader=None,
    train_size=0.7,
    phi_list=[  # icgauge.feature_extractors.manual_content_flags,
        # icgauge.feature_extractors.length,
        # icgauge.feature_extractors.word_length_features,
        # icgauge.feature_extractors.modal_presence,
        # icgauge.feature_extractors.get_more_most_counts,
        # icgauge.feature_extractors.hedge_presence,
        # icgauge.feature_extractors.word_intensity,
        # icgauge.feature_extractors.get_morphological_counts,
        # icgauge.feature_extractors.transitional_presence,
        # icgauge.feature_extractors.conjunctives_presence,
        # icgauge.feature_extractors.punctuation_presence,
        # icgauge.feature_extractors.determiner_usage,
        # icgauge.feature_extractors.dimensional_decomposition,
        # icgauge.feature_extractors.syntactic_parse_features,
        # icgauge.feature_extractors.kannan_ambili,
        icgauge.feature_extractors.semcom_sentiment_features
    ],
    class_func=icgauge.label_transformers.
    identity_class_func,  #vs. ternary_class_func
    train_func=icgauge.training_functions.
    fit_logistic_at_with_crossvalidation,  # does not have crossvalidation
    score_func=scipy.stats.stats.pearsonr,
    verbose=False)

# Print out the results
Пример #7
0
from icgauge import experiment_frameworks


corr, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
    train_reader=icgauge.data_readers.train, 
    assess_reader=None, 
    train_size=0.7,
    phi_list=[icgauge.feature_extractors.manual_content_flags,
              icgauge.feature_extractors.length,
              icgauge.feature_extractors.word_length_features,
              icgauge.feature_extractors.modal_presence,
              icgauge.feature_extractors.get_more_most_counts,
              icgauge.feature_extractors.hedge_presence,
              icgauge.feature_extractors.word_intensity,
              icgauge.feature_extractors.get_morphological_counts,
              icgauge.feature_extractors.transitional_presence,
              icgauge.feature_extractors.conjunctives_presence,
              icgauge.feature_extractors.punctuation_presence,
              icgauge.feature_extractors.determiner_usage,
              icgauge.feature_extractors.dimensional_decomposition,
              icgauge.feature_extractors.syntactic_parse_features,
              icgauge.feature_extractors.kannan_ambili
             ], 
    class_func=icgauge.label_transformers.identity_class_func, #vs. ternary_class_func
    train_func=icgauge.training_functions.fit_maxent_with_crossvalidation,  # does not have crossvalidation
    score_func=scipy.stats.stats.pearsonr,
    verbose=False,
    iterations=1)

# Print out the results
print "\n-- AFTER COMPLETION --"
Пример #8
0
print "Punctuation test framework:"

print "  =================================== "
print "	 First test: dataset with punctuation"
print "  =================================== "
print "  Data: practice, test, toy dataset"
print "  Features:  all features"
print "  Labels:    original 7-point scale"
print "  Model:     logistic regression (classification)"
print

corr, alpha, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
    train_reader=icgauge.data_readers.punctuated_set,
    assess_reader=None,
    train_size=0.7,
    phi_list=[icgauge.feature_extractors.all_features],
    class_func=icgauge.label_transformers.identity_class_func,
    train_func=icgauge.training_functions.fit_logistic_at,
    score_func=scipy.stats.stats.pearsonr,
    verbose=False,
    iterations=10)

print "\n-- AFTER COMPLETION --"
print "Averaged correlation (95% CI): "
print np.round(np.mean(corr), 2), "+/-", np.round(np.std(corr), 2)
print "All correlations:"
print corr
print
print "Averaged Cronbach's alpha (95% CI): "
print np.round(np.mean(alpha), 2), "+/-", np.round(np.std(alpha), 2)
print "All alphas:"
print alpha
Пример #9
0
print "Punctuation test framework:"

print "  =================================== "
print "	 First test: dataset with punctuation"
print "  =================================== "
print "  Data: practice, test, toy dataset"
print "  Features:  all features"
print "  Labels:    original 7-point scale"
print "  Model:     logistic regression (classification)"
print

corr, alpha, conf_matrix, details = experiment_frameworks.experiment_features_iterated(
    train_reader=icgauge.data_readers.punctuated_set, 
    assess_reader=None, 
    train_size=0.7,
    phi_list=[icgauge.feature_extractors.all_features], 
    class_func=icgauge.label_transformers.identity_class_func,
    train_func=icgauge.training_functions.fit_logistic_at,
    score_func=scipy.stats.stats.pearsonr,
    verbose=False,
    iterations=10)


print "\n-- AFTER COMPLETION --"
print "Averaged correlation (95% CI): " 
print np.round(np.mean(corr),2), "+/-", np.round(np.std(corr),2)
print "All correlations:"
print corr
print
print "Averaged Cronbach's alpha (95% CI): " 
print np.round(np.mean(alpha),2), "+/-", np.round(np.std(alpha),2)
print "All alphas:"