Пример #1
0
def preprocess(text):
    #TODO とりあえず最初の文の最初の名詞を利用, なければ最初の形態素
    sentences = api.sentences(text)['sentences']
    for sentence in api.sentences(text)['sentences']:
        morphs = api.morph(sentence)['morphs']
        for morph in morphs:
            if u'名詞' in morph['pos']:
                seed = morph
                break
        else:
            seed = morphs[1]
    return seed
Пример #2
0
def twitter_based(text):
    seed = generate_seed(text)
    # TODO とりあえずツイート検索結果の最初のツイートを利用
    tweet_example = api.search_tweets(seed['norm_surface'], limit=1)
    # 空ならNoneを返す
    if tweet_example['count'] == 0:
        return None
    # TODO とりあえず先頭の文を利用
    # 形態素列書き換え
    sent = api.sentences(tweet_example['texts'][0])['sentences'][0]
    return postprocess(sent)
Пример #3
0
def generate_seed(text):
    max_length = 0
    for sentence in api.sentences(text)['sentences']:
        morphs = api.morph(sentence)['morphs']
        for morph in morphs:
            if u'固有' in morph['pos'] or u'名詞' in morph['pos']:
                seed = morph
                break
            else:
                seed = morphs[1]
    return seed
Пример #4
0
def scenario_based(text):
    sent = api.sentences(text)
    text = []
    for s in sent['sentences']:
        morphs = api.morph(s)
        query = list()
        for morph in morphs['morphs']:
            query.append(u'{}:{}'.format(morph['surface'], morph['pos']))
    texts = api.trigger(scenario_file,query)
    for t in texts['texts']:
        text.append(t)
    if len(text)>0:
        r = random.randint(0,len(text)-1)
        return text[r]
    return None
Пример #5
0
# -*- coding: utf-8 -*-

import api

api = api.API('https://52.68.75.108', 'secret', 'js2015cps')

print
print '文分割'
s = '日本語文字列を文単位で分割する。複数文を渡すと、文ごとに区切ってくれる。'
for sentence in  api.sentences(s)['sentences']:
    print sentence


print
print 'ツイート検索'
print '=' * 20
for text in api.search_tweets('検索')['texts']:
    print text

print
print 'リプライ検索'
print '=' * 20
for text in api.search_reply('検索')['texts']:
    print text

print
print 'マルコフ連鎖'
print '=' * 20
seed = {'norm_surface': "今日", 'pos': "名詞"}
for morph in  api.markov_chain(seed)['morphs']:
    print morph,
Пример #6
0
if __name__ == "__main__":
    api = get_api()

    reps = api.get_reply()
    print json.dumps(reps, ensure_ascii=False, indent=4)

    for rep in reps["replies"]:
        print "======================================================"
        if reps["grade"] == 0:
            scenario_file = "scenario_c09.txt"
        elif reps["grade"] == 1:
            scenario_file = "scenario_c09.txt"
        else:
            scenario_file = "scenario_c09.txt"
        sent = api.sentences(rep["text"])
        text = []
        for s in sent["sentences"]:
            print "-------------------------------------------------------"
            print s
            morphs = api.morph(s)
            print json.dumps(morphs, ensure_ascii=False, indent=4)
            query = list()
            for morph in morphs["morphs"]:
                query.append(u"{}:{}".format(morph["surface"], morph["pos"]))
            print json.dumps(query, ensure_ascii=False, indent=4)
            texts = api.trigger(scenario_file, query)
            print json.dumps(texts, ensure_ascii=False, indent=4)
            for t in texts["texts"]:
                text.append(t)
                print t