Пример #1
0
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger([
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
     ])
Пример #2
0
def processQuestion(gloveModel,
                    question,
                    minLen=1,
                    maxLen=3,
                    useAPI=False,
                    useSynonyms=False):
    tagger = POSTagger()
    pos = tagger.parse(question)
    # create splitter and generalizer
    splitter = Splitter()
    if question[-1] == '?' or question[-1] == '.':
        question = question[:-1]
    gen_question = splitter.generalize(question, pos)
    labels = []
    resultsExists = False
    if not useAPI:
        parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    else:
        resultsExists = True
        apiResult, _ = api.getBinaryRelations(question)
        parts = [
            rel.predicate for rel in apiResult
            if len(rel.predicate_positions_) > 1
        ]
        for part in parts:
            if len(part.split()) > 1:
                labels.append(part.split()[0] +
                              ''.join(''.join([w[0].upper(), w[1:].lower()])
                                      for w in part.split()[1:]))
        if useSynonyms:
            predicates = [max(part.split(), key=len) for part in parts]
            if predicates is not None and len(predicates) > 0:
                for predicate in predicates:
                    for part in list(parts):
                        if predicate in part:
                            for syn in gloveModel.gloveModel.most_similar(
                                    predicate.lower()):
                                parts.append(part.replace(predicate, syn[0]))
        if len(parts) == 0:
            resultsExists = False
            parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    # create embedder part
    vectors = []
    for part in parts:
        vectors.append(gloveModel.getVector(part))
    return vectors, parts, pos, gen_question, labels, resultsExists
Пример #3
0
def extract_text(arg, punkt):
    """
    Extract sentences from argument text and find the passive sentences
    """

    with open(arg) as f:
        logger.info("Reading Text file")
        text = f.read()
        sentences = punkt.tokenize(text)

        logger.info(f"{len(sentences)} sentences detected")

        for sent in sentences:
            find_passives(sent)
            print("-" * 60)


if __name__ == "__main__":

    TAGGER = POSTagger().get()

    if len(sys.argv) > 1:

        # pre-trained version of PunktSentenceTokenizer
        punkt = nltk.tokenize.punkt.PunktSentenceTokenizer()

        for arg in sys.argv[1:]:
            extract_text(arg, punkt)
    else:
        print("No sentences")