Пример #1
0
            lvl_data[i] += 1
        else:
            lvl_data[i] = 1
    return sorted(lvl_data.items(), key=lambda d: d[0])


def plot_stat(stat, total, name):
    k = [d[0] for d in stat]
    v = [d[1] for d in stat]

    plt.plot(k, v, '-*')
    plt.title('{} distribution of {} shops'.format(name, total))
    plt.xlabel(name)
    plt.ylabel('number')
    plt.show()


def shop_rev(session):
    rev_cnt = [item.count for item in session.query(ShopReviewCnt).all()]
    plot_stat(aggressive(rev_cnt), len(rev_cnt), 'shop reviews count')


if __name__ == '__main__':
    db_pf = 'sqlite:///cache/db_profile.sqlite3'
    Session = install(db_pf)
    session = Session()

    shop_rev(session)

    session.close()
Пример #2
0
    id = Column(Integer, Sequence('shop_review'), primary_key=True)
    rev_id = Column(String(20))
    sid = Column(String(20))
    uid = Column(String(20))
    star = Column(Integer)
    entry = Column(String(5000))
    recommend = Column(String(5000))
    rev_time = Column(String(50))


class CntShopReview(HisCount):
    __tablename__ = 'shop_review_cnt'


class ShopTags(BaseModel):
    __tablename__ = 'shop_tags'

    id = Column(Integer, Sequence('shop_tags'), primary_key=True)
    sid = Column(String(20))
    tag = Column(String(5000))

    def __init__(self, sid, tag):
        self.sid = sid
        self.tag = tag


if __name__ == '__main__':
    Session = install('sqlite:///cache/test.sqlite3')
    session = Session()
    session.close()
Пример #3
0
    session.commit()


def save_shop_cate(session, shop_prof_dir):
    parsed = {i.sid for i in session.query(ShopTags).distinct().all()}
    print '{} shop category parsed'.format(len(parsed))

    data = []

    for sid, c in read_file(shop_prof_dir, parsed, lambda fn: fn[:-5]):
        text = parse(_cate_progs, c, id, 'shop cate')
        tags = set(_cate_field_progs.findall(text)) - {'»'}
        data.extend([ShopTags(sid, tag) for tag in tags])

    session.add_all(data)
    session.commit()


if __name__ == '__main__':
    BASE_DIR = os.path.dirname(__file__)
    shop_prof_dir = os.path.join(BASE_DIR, 'cache/shop_prof')

    Session = install('sqlite:///cache/dianping.sqlite3')
    session = Session()

    save_shop_basic(session, shop_prof_dir)
    save_shop_review(session, shop_prof_dir)
    save_shop_cate(session, shop_prof_dir)

    session.close()
Пример #4
0
        else:
            lvl_data[i] = 1
    return sorted(lvl_data.items(), key=lambda d: d[0])


def plot_stat(stat, total, name):
    k = [d[0] for d in stat]
    v = [d[1] for d in stat]

    plt.plot(k, v, '-*')
    plt.title('{} distribution of {} shops'.format(name, total))
    plt.xlabel(name)
    plt.ylabel('number')
    plt.show()


def shop_rev(session):
    rev_cnt = [item.count for item in session.query(ShopReviewCnt).all()]
    plot_stat(aggressive(rev_cnt), len(rev_cnt),
              'shop reviews count')


if __name__ == '__main__':
    db_pf = 'sqlite:///cache/db_profile.sqlite3'
    Session = install(db_pf)
    session = Session()

    shop_rev(session)

    session.close()