示例#1
0
def showFeatures(IDsFilename=REVIEW_IDS_FILENAME):
    corpus = Corpus(IDsFilename)
    features, featureVectors = extractFeatures(corpus.reviewIDs,
                                               corpus.reviews,
                                               features=None)

    showFeatureOccurrence(features, featureVectors)
示例#2
0
def applyRules(IDsFilename):
    """Uses rule based approach to classify the reviews from the given set."""
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH,
                                                   file=IDsFilename))

    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)

    # print("Loading reviews...")
    # dataSet = Corpus.loadCorpus(filename="training_set.pk")

    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs,
                                               dataSet.reviews)

    gold = dataSet.goldStandard
    classification = classify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)

    targets = []
    cls = []

    for ID, g in gold.items():
        targets.append(g)
        cls.append(classification[ID])

    showPerformance(targets, cls)
示例#3
0
def applyRules(IDsFilename):
    """Uses rule based approach to classify the reviews from the given set."""
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH, 
                                                    file=IDsFilename))
    
    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)
    
    # print("Loading reviews...")
    # dataSet = Corpus.loadCorpus(filename="training_set.pk")

    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs, 
                                                dataSet.reviews)

    gold = dataSet.goldStandard
    classification = classify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)
    
    targets = []
    cls = []

    for ID, g in gold.items():
        targets.append(g)
        cls.append(classification[ID])

    showPerformance(targets, cls)
示例#4
0
def testRules():
    """Uses rule based approach to classify reviews."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)

    gold = {ID: reviews[ID].ironic for ID in ironicIDs + regularIDs}
    classification = ruleClassify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)
    showPerformance(gold, classification)
示例#5
0
def testRules():
    """Uses rule based approach to classify reviews."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)

    gold = {ID: reviews[ID].ironic for ID in ironicIDs + regularIDs}
    classification = ruleClassify(features, featureVectors)

    showFeatureOccurrence(features, featureVectors, gold, classification)
    showPerformance(gold, classification)
示例#6
0
def applySingleRules(IDsFilename):
    """
    Should originally just apply one rule.
    Is now used to apply one feature to the given corpus.
    So it basically shows how often each feature occurs in ironic and regular 
    reviews.
    """
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH, 
                                                    file=IDsFilename))
    
    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)
    print("Loading reviews...")
#   dataSet = Corpus.loadCorpus(filename="training_set.pk")
    # dataSet = Corpus.loadCorpus(filename="training_and_validation_set.pk")


    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs, 
                                                dataSet.reviews)

    showFeatureOccurrence(features, featureVectors)

    gold = dataSet.goldStandard
    
    # decisiveFeatureNames = ["Scare quotes", 
    #                         "Positive star polarity discrepancy",
    #                         "Negative star polarity discrepancy",
    #                         "Positive Ppunctuation",
    #                         "Negative Ppunctuation",
    #                         "Streak of Positive Words",
    #                         "Ellipsis and Punctuation",
    #                         "Emoticon Happy", "Emoticon Laughing", 
    #                         "Emoticon Winking", "Emotion Tongue", 
    #                         "LoLAcroym", "GrinAcronym", "Onomatopoeia",
    #                         "Interrobang"]

    decisiveFeatureNames = [f.name for f in features]

    for d in decisiveFeatureNames:
        classification = classify(features, featureVectors, [d])

        targets = []
        cls = []

        for ID, g in gold.items():
            targets.append(g)
            cls.append(classification[ID])

        print("\nClassifying by rule: ", d)

        showPerformance(targets, cls)
示例#7
0
def applySingleRules(IDsFilename):
    """
    Should originally just apply one rule.
    Is now used to apply one feature to the given corpus.
    So it basically shows how often each feature occurs in ironic and regular
    reviews.
    """
    print("Using the set at '{path}{file}'".format(path=CORPUS_PATH,
                                                   file=IDsFilename))

    print("Creating reviews...(this may take a while)")
    dataSet = Corpus(IDsFilename, corpusPath=CORPUS_PATH)
    print("Loading reviews...")
    #   dataSet = Corpus.loadCorpus(filename="training_set.pk")
    # dataSet = Corpus.loadCorpus(filename="training_and_validation_set.pk")

    print("Extracting features...")
    features, featureVectors = extractFeatures(dataSet.reviewIDs,
                                               dataSet.reviews)

    showFeatureOccurrence(features, featureVectors)

    gold = dataSet.goldStandard

    # decisiveFeatureNames = ["Scare quotes",
    #                         "Positive star polarity discrepancy",
    #                         "Negative star polarity discrepancy",
    #                         "Positive Ppunctuation",
    #                         "Negative Ppunctuation",
    #                         "Streak of Positive Words",
    #                         "Ellipsis and Punctuation",
    #                         "Emoticon Happy", "Emoticon Laughing",
    #                         "Emoticon Winking", "Emotion Tongue",
    #                         "LoLAcroym", "GrinAcronym", "Onomatopoeia",
    #                         "Interrobang"]

    decisiveFeatureNames = [f.name for f in features]

    for d in decisiveFeatureNames:
        classification = classify(features, featureVectors, [d])

        targets = []
        cls = []

        for ID, g in gold.items():
            targets.append(g)
            cls.append(classification[ID])

        print("\nClassifying by rule: ", d)

        showPerformance(targets, cls)
示例#8
0
def showFeatures(IDsFilename=REVIEW_IDS_FILENAME):
    corpus = Corpus(IDsFilename)
    features, featureVectors = extractFeatures(corpus.reviewIDs, corpus.reviews, features=None)

    showFeatureOccurrence(features, featureVectors)
示例#9
0
def testFeatures():
    """Tests if the features work on the corpus."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)
    showFeatureOccurrence(features, featureVectors)
示例#10
0
def testFeatures():
    """Tests if the features work on the corpus."""
    ironicIDs, regularIDs, reviews = createTestReviews()
    features, featureVectors = extractFeatures(ironicIDs + regularIDs, reviews)
    showFeatureOccurrence(features, featureVectors)