Пример #1
0
def main():
    system("clear")
    print("\
                 ####TNWST####\n\
            \n\
            \n\
            1. Contador de palavras e Gráfico:\n\
            2. Concordancia e Dispersão:\n\
            3. Comparação:\n\
            4. Tagger:(Inglês)\n\n\
            5. Sair\n\n")

    choice = int(input('Digite a opção desejada: '))

    if choice is 1:
        contagem()
   
    elif choice == 2:
        concord()
    
    elif choice == 3:
        compare()
    
    elif choice == 4:
        tag()
    
    elif choice == 5:
        exit()
Пример #2
0
def main():
    system("clear")
    print("\
                 ####TNWST####\n\
            \n\
            \n\
            1. Contador de palavras e Gráfico:\n\
            2. Concordancia e Dispersão:\n\
            3. Comparação:\n\
            4. Tagger:(Inglês)\n\n\
            5. Sair\n\n")

    choice = int(input('Digite a opção desejada: '))

    if choice is 1:
        contagem()

    elif choice == 2:
        concord()

    elif choice == 3:
        compare()

    elif choice == 4:
        tag()

    elif choice == 5:
        exit()
def process():

    # Mengakses data form dari request HTTP
    text = request.form.get("text", "")

    # Melakukan preprocessing
    text = preprocess(text)

    # Melakukan tagging
    text = tag(text, "http://localhost:7000")

    # Melakukan chunking
    text = chunk(text)

    # Melakukan proses normalisasi
    text = normalize(text)

    # Membuat response HTTP dengan format JSON yang berisi teks yang telah diproses
    return jsonify({
        "status": "success",
        "message": "Request successful",
        "data": {
            "text": text
        }
    })
def tag_tropes(fp):
    tropes = read_json(fp)
    results = list()
    for tup in tropes:
        trope = tup[0]
        description = tup[1]
        result = tagger.tag(description)
        results.append((trope, result['tagged'], result['by_tag']))
    return results
Пример #5
0
    def check(self, instance):
        lowtags = tagger.get_tags("test_entity", False)
        self.gauge("old_method.low_card", 1, tags=lowtags)

        alltags = tagger.get_tags("test_entity", True)
        self.gauge("old_method.high_card", 1, tags=alltags)

        notags = tagger.get_tags("404", True)
        self.gauge("old_method.unknown", 1, tags=notags)

        lowtags = tagger.tag("test_entity", tagger.LOW)
        self.gauge("new_method.low_card", 1, tags=lowtags)

        orchtags = tagger.tag("test_entity", tagger.ORCHESTRATOR)
        self.gauge("new_method.orch_card", 1, tags=orchtags)

        alltags = tagger.tag("test_entity", tagger.HIGH)
        self.gauge("new_method.high_card", 1, tags=alltags)

        notags = tagger.tag("404", tagger.LOW)
        self.gauge("new_method.unknown", 1, tags=notags)
Пример #6
0
 def parse(self, tokens, tagger = None):
     # tokens = self._preprocess(list(tokens))
     if (tagger == None):
         tagged = nltk.pos_tag(tokens)
     else:
         tagged = tagger.tag(tokens)
     # print tagged
     missing = False
     for tok, pos in tagged:
         if not self._grammar._lexical_index.get(tok):
             missing = True
             self._grammar._productions.append(ProbabilisticProduction(Nonterminal(pos), [tok], prob=0.000001))
     if missing:
         self._grammar._calculate_indexes()
     return super(PCFGViterbiParser, self).parse(tokens)
Пример #7
0
def capitalize_named_entities(text):
    # 04: Mengubah penulisan entitas bernama dalam teks sesuai dengan kapitalisasi yang benar
    current_dir_path = os.path.dirname(os.path.realpath(__file__))
    entity_capitalizations = json.loads(
        open(os.path.join(current_dir_path,
                          "entity_capitalizations.json")).read())

    tagged_text = tag(text)
    result = text.lower()

    tagged_entities = []

    container = ""
    is_combining = False

    for i, token in enumerate(tagged_text):
        token_text = token[0].lower()
        token_code = token[1][0]

        if (not is_combining and token_code == "B"):
            is_combining = True
            container = token_text
        elif (not is_combining and token_code == "I"):
            raise Exception()
        elif (is_combining and token_code == "B"):
            tagged_entities.append(container)
            container = token_text
        elif (is_combining and token_code == "I"):
            if (token_text in string.punctuation):
                container += token_text
            else:
                container += (" " + token_text)

        if (i == (len(tagged_text) - 1)):
            tagged_entities.append(container)

    print("Hasil deteksi entitas bernama: ")
    print(tagged_text, end="\n\n")

    for entity in tagged_entities:
        lowercased_entity = entity.lower()
        if (lowercased_entity in entity_capitalizations):
            result = result.replace(
                lowercased_entity,
                entity_capitalizations[lowercased_entity]["text"])

    return result
def extract_noun_phrases(text):
    """Extracts all noun_phrases from the given text"""
    toks = nltk.regexp_tokenize(text, sentence_re)
    postoks = tagger.tag(toks)

    # Build a POS tree
    tree = chunker.parse(postoks)
    terms = get_terms(tree)

    # Extract Noun Phrase
    noun_phrases = []
    for term in terms:
        np = ""
        for word in term:
            np += word + " "
        if np != "":
            noun_phrases.append(np.strip())
    return noun_phrases
from preprocessor import preprocess
from tagger import tag
from chunker import chunk
from normalizer import normalize

input_file = open("input.txt")
preprocessed_file = open("preprocessed.txt", "w")
tagged_file = open("tagged.txt", "w")
chunked_file = open("chunked.txt", "w")
normalized_file = open("normalized.txt", "w")

input_text = "\n".join([line for line in input_file])

# Praproses
temp = preprocess(input_text)
preprocessed_file.write(temp)

# Tagging
temp = tag(temp, "http://localhost:7000")
tagged_file.write(temp)

# Chunking
temp = chunk(temp)
chunked_file.write(temp)

# Normalisasi
temp = normalize(temp)
normalized_file.write(temp)


Пример #10
0
    """Generate one or more HTML tags"""
    if cls is not None:
        attrs['class'] = cls
    if attrs:
        attr_str = ''.join(' %s="%s"' % (attr, value)
                           for attr, value in sorted(attrs.items()))
    else:
        attr_str = ''
    if content:
        return '\n'.join('<%s%s>%s</%s>' % (name, attr_str, c, name)
                         for c in content)
    else:
        return '<%s%s />' % (name, attr_str)


tag_br = tag('br')
print('tag_br = {0}'.format(tag_br))
print()

tag_p_hello = tag('p', 'hello')
print('tag_p_hello = {0}'.format(tag_p_hello))
print()

tag_p_hello_world = tag('p', 'hello', 'world')
print('tag_p_hello_world = \n{0}'.format(tag_p_hello_world))
print()

tag_p_hello_id = tag('p', 'hello', id=33)
print('tag_p_hello_id = {0}'.format(tag_p_hello_id))
print()
Пример #11
0
     for tag in tagger.tags:
         p = tagger.precision(matrix, tag)
         r = tagger.recall(matrix, tag)
         LOG("tag %s: precision = %.4f, recall = %.4f\n" % (tag, p, r))
     LOG("accuracy = %.4f\n" % tagger.accuracy(matrix))
     
 # Load a trained model from a file and use it to tag new data (in CoNLL format).
 # Usage: python3 lab2.py tag MODEL_FILE IN_DATA_FILE OUT_DATA_FILE
 if sys.argv[1] == "tag":
     tagger = MyTagger(TAGS)
     LOG("Loading the model from %s ..." % sys.argv[2])
     tagger.load(sys.argv[2])
     LOG(" done\n")
     with open(sys.argv[4], 'w') as fp:
         for sentence in load_data(sys.argv[3]):
             tagged_tokens = tagger.tag([token[1] for token in sentence])
             for token, tagged_token in zip(sentence, tagged_tokens):
                 for i in range(len(token), 6):
                     token.append("_")
                 token[3] = tagged_token[1]
                 token[4] = tagged_token[1]
                 fp.write("\t".join(token))
                 fp.write("\n")
             fp.write("\n")
 
 # Load a trained model from a file and evaluate it on test data.
 # This will print the confusion matrix.
 # Usage: python3 lab2.py matrix MODEL_FILE TEST_DATA_FILE
 if sys.argv[1] == "matrix":
     tagger = MyTagger(TAGS)
     LOG("Loading the model from %s ..." % sys.argv[2])