Пример #1
0
def dramatize(s):
    """ Returns a string with stronger adjectives:
        dramatize("This code is nice") => "This code is legendary"
    """

    x = []

    # A parse tree takes a string and returns a list of sentences,
    # where each sentence is a list of words, where each word is an
    # object with interesting attributes such as Word.tag.
    for sentence in parsetree(s):
        for word in sentence:
            replaced = False
            if word.tag == "JJ":

                # What's the polarity of this adjective?
                polarity = sentiment(word.string)[0]

                # Don't change neutral adjectives like "last", "political", ...
                if polarity != 0.0:

                    # Can we find an adjective in our dictionary
                    # with a more extreme polarity?
                    # Note: the shuffled() function takes a list
                    # and returns a new, randomly ordered list.
                    for w, p in shuffled(adjectives.items()):
                        if polarity >= 0 and p > polarity + 0.2 \
                        or polarity <  0 and p < polarity - 0.2:
                            x.append(w.lower())
                            replaced = True
                            break
            if not replaced:
                x.append(word.string)

    return " ".join(x)
Пример #2
0
def dramatize(s):
    
    """ Returns a string with stronger adjectives:
        dramatize("This code is nice") => "This code is legendary"
    """
    
    x = []
    
    # A parse tree takes a string and returns a list of sentences,
    # where each sentence is a list of words, where each word is an
    # object with interesting attributes such as Word.tag.
    for sentence in parsetree(s):
        for word in sentence:
            replaced = False
            if word.tag == "JJ":
                
                # What's the polarity of this adjective?
                polarity = sentiment(word.string)[0]
                
                # Don't change neutral adjectives like "last", "political", ...
                if polarity != 0.0:

                    # Can we find an adjective in our dictionary
                    # with a more extreme polarity?
                    # Note: the shuffled() function takes a list 
                    # and returns a new, randomly ordered list. 
                    for w, p in shuffled(adjectives.items()):
                        if polarity >= 0 and p > polarity + 0.2 \
                        or polarity <  0 and p < polarity - 0.2:
                            x.append(w.lower())
                            replaced = True
                            break
            if not replaced:
                x.append(word.string)
                
    return " ".join(x)
Пример #3
0
 def test_shuffled(self):
     # Assert shuffled() <=> sorted().
     v1 = [1,2,3,4,5,6,7,8,9,10]
     v2 = vector.shuffled(v1)
     self.assertTrue(v1 != v2 and v1 == sorted(v2))
     print("pattern.vector.shuffled()")
Пример #4
0
 def test_shuffled(self):
     # Assert shuffled() <=> sorted().
     v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     v2 = vector.shuffled(v1)
     self.assertTrue(v1 != v2 and v1 == sorted(v2))
     print "pattern.vector.shuffled()"
Пример #5
0
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print("training model...")

seed(0)  # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i + 1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)
Пример #6
0
# so you can still follow the rest of the example.

classifier = SVM()

# We'll build a classifier to predict sentiment in Dutch movie reviews.
# For example, "geweldige film!" (great movie) indicates a positive sentiment.
# The CSV file at pattern/test/corpora/polarity-nl-bol.com.csv
# contains 1,500 positive and 1,500 negative reviews.

# The pattern.vector module has a shuffled() function
# which we use to randomly arrange the reviews in the list:

print("loading data...")
data = os.path.join(os.path.dirname(__file__), "..", "..", "test", "corpora", "polarity-nl-bol.com.csv")
data = Datasheet.load(data)
data = shuffled(data)

# We do not necessarily need Document objects as in the previous examples.
# We can train any classifier on simple Python dictionaries too.
# This is sometimes easier if you want full control over the data.
# The instance() function below returns a train/test instance for a given review:
# 1) parse the review for part-of-speech tags,
# 2) keep adjectives, adverbs and exclamation marks (these mainly carry sentiment),
# 3) lemmatize the Dutch adjectives, e.g., "goede" => "goed" (good).
# 4) count the distinct words in the list, map it to a dictionary.


def instance(review):                     # "Great book!"
    v = tag(review)                       # [("Great", "JJ"), ("book", "NN"), ("!", "!")]
    v = [word for (word, pos) in v if pos in ("JJ", "RB") or word in ("!")]
    v = [predicative(word) for word in v] # ["great", "!", "!"]
Пример #7
0
# so you can still follow the rest of the example.

classifier = SVM()

# We'll build a classifier to predict sentiment in Dutch movie reviews.
# For example, "geweldige film!" (great movie) indicates a positive sentiment.
# The CSV file at pattern/test/corpora/polarity-nl-bol.com.csv
# contains 1,500 positive and 1,500 negative reviews.

# The pattern.vector module has a shuffled() function
# which we use to randomly arrange the reviews in the list:

print "loading data..."
data = Datasheet.load(
    os.path.join("..", "..", "test", "corpora", "polarity-nl-bol.com.csv"))
data = shuffled(data)

# We do not necessarily need Document objects as in the previous examples.
# We can train any classifier on simple Python dictionaries too.
# This is sometimes easier if you want full control over the data.
# The instance() function below returns a train/test instance for a given review:
# 1) parse the review for part-of-speech tags,
# 2) keep adjectives, adverbs and exclamation marks (these mainly carry sentiment),
# 3) lemmatize the Dutch adjectives, e.g., "goede" => "goed" (good).
# 4) count the distinct words in the list, map it to a dictionary.


def instance(review):  # "Great book!"
    v = tag(review)  # [("Great", "JJ"), ("book", "NN"), ("!", "!")]
    v = [word for (word, pos) in v if pos in ("JJ", "RB") or word in ("!")]
    v = [predicative(word) for word in v]  # ["great", "!", "!"]
Пример #8
0
# it will check if it could correctly predict this example.
# If not, it will adjust its weights.
# So the accuracy of the perceptron can be improved significantly
# by training in multiple iterations, averaging out all weights.

# This will take several minutes.
# If you want it to run faster for experimentation,
# use less iterations or less data in the code below:

print("training model...")

seed(0)  # Lock random list shuffling so we can compare.

m = Model(known=known, unknown=unknown, classifier=SLP())
for iteration in range(5):
    for s in shuffled(data[:20000]):
        prev = None
        next = None
        for i, (w, tag) in enumerate(s):
            if i < len(s) - 1:
                next = s[i + 1]
            m.train(w, tag, prev, next)
            prev = (w, tag)
            next = None

f = os.path.join(os.path.dirname(__file__), "en-model.slp")
m.save(f, final=True)

# Each parser in Pattern (pattern.en, pattern.es, pattern.it, ...)
# assumes that a lexicon of known words and their most frequent tag is available,
# along with some rules for morphology (suffixes, e.g., -ly = adverb)