示例#1
0
def draw_title():
    psql = PsqlQuery()
    gtitle = psql.query(draw_random_title_sql)
    schema = psql.schema
    title = []
    for t in gtitle:
        title.append(t)
    return title[0], schema
示例#2
0
    def query_comment_by_id(self, comment_id):
        psql = PsqlQuery()
        comment = psql.query(self.query_comment_by_id_sql, {
            'id_': tuple(comment_id),
            'tok': self.tokenizer_tag
        })
        schema = psql.schema

        return comment, schema
示例#3
0
    def query_title_by_id(self, title_id):
        psql = PsqlQuery()
        title = psql.query(self.query_title_by_id_sql, {
            'id_': tuple(title_id),
            'tok': self.tokenizer_tag
        })
        schema = psql.schema

        return title, schema
示例#4
0
def query_freq_sum():

    query_freq_sum_sql = '''
        SELECT SUM(postfreq) AS postfreq_sum,
               SUM(commentfreq) AS commentfreq_sum
        FROM pttcorpus_vocabulary;
    '''

    query_vocab_pairfreq_sum_sql = '''
        SELECT SUM(pxy) AS sum
        FROM pttcorpus_association;
    '''

    psql = PsqlQuery()
    postfreq_sum, commentfreq_sum = list(psql.query(query_freq_sum_sql))[0]
    logger.info('postfreq_sum:{}, commentfreq_sum:{}'.format(
        postfreq_sum, commentfreq_sum))
    vocab_pairfreq_sum = list(psql.query(query_vocab_pairfreq_sum_sql))[0][0]
    logger.info('vocab_pairfreq_sum:{}'.format(vocab_pairfreq_sum))

    return postfreq_sum, commentfreq_sum, vocab_pairfreq_sum
示例#5
0
def query_vocab_id(batch_size=1000):
    sql = 'SELECT id FROM pttcorpus_vocabulary;'
    psql = PsqlQuery()
    vocabs = psql.query(sql)
    batch = []
    i = 0
    for v in vocabs:
        batch.append(v[0])
        i += 1
        if i > batch_size:
            i = 0
            yield batch
            batch = []
    yield batch
示例#6
0
def generate_random_post(ref):
    psql = PsqlQuery()

    posts = psql.query(query_random_post_sql)

    return [p[0] for p in posts][:len(ref)]
示例#7
0
def extract_words(comments):
    if not bool(comments):
        return []

    def extract(cmt):
        return [v for v in cmt.vocabs]

    return [extract(cmt) for cmt in comments]


if __name__ == '__main__':
    with open('eval0829.csv', 'w') as f:
        f.write('random, base, pweight\n')
    psql = PsqlQuery()
    posts = psql.query(query_post_sql)
    pschema = psql.schema

    valid_post = 0

    for idx, p in enumerate(posts):
        titles, tschema = psql.query_all(
            query_title_sql, dict(pid=p[pschema['id']], tok='jieba'))

        basic_retriever = RetrievalEvaluate(
            'jieba',
            excluded_post_ids=[p[pschema['id']]],
            logger_name='retrieve')

        pweight_retriever = RetrievalEvaluate(
            'jieba',
示例#8
0
    def query_post_by_id(self, post_id):
        psql = PsqlQuery()
        post = psql.query(self.query_post_by_id_sql, (tuple(post_id), ))
        schema = psql.schema

        return post, schema