Exemplo n.º 1
0
def main(clean_text):
    global TAGGER
    TAGGER = postagger.get_tagger()
    passive_sentences = findpassives(clean_text)
    # all_sentences = sentence_count(clean_text)
    # if len(sys.argv) > 1:
    #    for fn in sys.argv[1:]:

    #        print "------------------------------------"
    #        passive_sentences = findpassives(fn)
    #        print type(passive_sentences)
    #        print len(passive_sentences)
    #        for sentence in passive_sentences:
    #            print sentence
    #sentences = fn
    #sentences = sentences.replace("\n", "").replace("\r", "")
    #sentences = nltk.sent_tokenize(sentences)
    #    all_sentences = sentence_count(fn)
    #sentence_count = len(sentences)
    #else:
    #    repl()
    # for report
    #fn = sys.argv[1]
    #fh = open("results.txt", "r")
    #passive_sentences = fh.readlines()
    #fh.close
    #report(fn, len(passive_sentences), float(all_sentences), passive_sentences)
    return passive_sentences
    print len(passive_sentences)
    print type(passive_sentences)
    for sentence in passive_sentences:
        print sentence
Exemplo n.º 2
0
def main():
    global TAGGER
    TAGGER = postagger.get_tagger()

    if len(sys.argv) > 1:
        for fn in sys.argv[1:]:
            findpassives(fn)
    else:
        repl()
Exemplo n.º 3
0
def main():
    global TAGGER
    TAGGER = postagger.get_tagger()

    if len(sys.argv) > 1:
        for fn in sys.argv[1:]:
            findpassives(fn)
    else:
        repl()
Exemplo n.º 4
0
# -*- coding: utf-8 -*-


from __future__ import print_function

import os
import sys
import nltk
import re

from nltk import pos_tag, word_tokenize
from itertools import dropwhile
import postagger

global TAGGER
TAGGER = postagger.get_tagger()

# The code for the detection of passive voice has been done with help from https://github.com/j-c-h-e-n-g/nltk-passive-voice

def tag_sentence(sent):
	"""Take a sentence as a string and return a list of (word, tag) tuples."""
	assert isinstance(sent, basestring)

	tokens = word_tokenize(sent)
	return TAGGER.tag(tokens)

def passivep(tags):
	postToBe = list(dropwhile(lambda(tag): not tag.startswith("BE"), tags))
	nongerund = lambda(tag): tag.startswith("V") and not tag.startswith("VBG")

	filtered = filter(nongerund, postToBe)