Exemplo n.º 1
0
def main():
    
    mecab_file   = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
            if morphdict["pos"] == "動詞":
               print morphdict["base"]
Exemplo n.º 2
0
def main():

    mecab_file = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
            if morphdict["pos"] == "動詞":
                print morphdict["surface"]
Exemplo n.º 3
0
def main():

    mecab_file    = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
          if morphdict["pos"] == "名詞" and \
                morphdict["pos1"] == "サ変可能":
               print morphdict["surface"]
Exemplo n.º 4
0
def main():

    mecab_file = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    for one_sentence in all_sentences:
        for index in range(1, len(one_sentence)):
            if one_sentence[index]["surface"] == "の":
                if one_sentence[index-1]["pos"] == "名詞" and \
                   one_sentence[index+1]["pos"] == "名詞":
                    print one_sentence[index-1]["surface"]\
                          + one_sentence[index]["surface"]\
                          + one_sentence[index+1]["surface"]
Exemplo n.º 5
0
def main():

    mecab_file = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    count_word = defaultdict(lambda: 0)

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
            count_word[morphdict["base"]] += 1

    for word, count in sorted(count_word.items(), key=lambda x: -x[1]):
        print word + "\t" + str(count)
Exemplo n.º 6
0
def main():

    mecab_file    = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    for one_sentence in all_sentences:
        for index in range(1, len(one_sentence)):
            if one_sentence[index]["surface"] == "の":
               if one_sentence[index-1]["pos"] == "名詞" and \
                  one_sentence[index+1]["pos"] == "名詞":
                  print one_sentence[index-1]["surface"]\
                        + one_sentence[index]["surface"]\
                        + one_sentence[index+1]["surface"]
Exemplo n.º 7
0
def main():
    
    mecab_file = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    count_word = defaultdict(lambda: 0)

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
            count_word[morphdict["base"]] += 1

    for word, count in sorted(count_word.items(), key = lambda x:-x[1]):
        print word + "\t" + str(count)
Exemplo n.º 8
0
def main():

    mecab_file = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    length_dict = dict()
    nounstring = str()
    noun_count = 0

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
            if morphdict["pos"] == "名詞":
                nounstring += morphdict["surface"]
                noun_count += 1
            else:
                length_dict[noun_count] = nounstring
                nounstring = str()
                noun_count = 0

    print length_dict[max(length_dict.keys())]
Exemplo n.º 9
0
def main():
    
    mecab_file = open(sys.argv[1], "r")
    all_sentences = takayuki.make_morphdicts(mecab_file)
    mecab_file.close()

    length_dict = dict()
    nounstring  = str()
    noun_count  = 0

    for one_sentence in all_sentences:
        for morphdict in one_sentence:
            if morphdict["pos"] == "名詞": 
               nounstring += morphdict["surface"]
               noun_count += 1
            else:
               length_dict[noun_count] = nounstring
               nounstring = str()
               noun_count = 0

    print length_dict[max(length_dict.keys())]