lvl_data[i] += 1 else: lvl_data[i] = 1 return sorted(lvl_data.items(), key=lambda d: d[0]) def plot_stat(stat, total, name): k = [d[0] for d in stat] v = [d[1] for d in stat] plt.plot(k, v, '-*') plt.title('{} distribution of {} shops'.format(name, total)) plt.xlabel(name) plt.ylabel('number') plt.show() def shop_rev(session): rev_cnt = [item.count for item in session.query(ShopReviewCnt).all()] plot_stat(aggressive(rev_cnt), len(rev_cnt), 'shop reviews count') if __name__ == '__main__': db_pf = 'sqlite:///cache/db_profile.sqlite3' Session = install(db_pf) session = Session() shop_rev(session) session.close()
id = Column(Integer, Sequence('shop_review'), primary_key=True) rev_id = Column(String(20)) sid = Column(String(20)) uid = Column(String(20)) star = Column(Integer) entry = Column(String(5000)) recommend = Column(String(5000)) rev_time = Column(String(50)) class CntShopReview(HisCount): __tablename__ = 'shop_review_cnt' class ShopTags(BaseModel): __tablename__ = 'shop_tags' id = Column(Integer, Sequence('shop_tags'), primary_key=True) sid = Column(String(20)) tag = Column(String(5000)) def __init__(self, sid, tag): self.sid = sid self.tag = tag if __name__ == '__main__': Session = install('sqlite:///cache/test.sqlite3') session = Session() session.close()
session.commit() def save_shop_cate(session, shop_prof_dir): parsed = {i.sid for i in session.query(ShopTags).distinct().all()} print '{} shop category parsed'.format(len(parsed)) data = [] for sid, c in read_file(shop_prof_dir, parsed, lambda fn: fn[:-5]): text = parse(_cate_progs, c, id, 'shop cate') tags = set(_cate_field_progs.findall(text)) - {'»'} data.extend([ShopTags(sid, tag) for tag in tags]) session.add_all(data) session.commit() if __name__ == '__main__': BASE_DIR = os.path.dirname(__file__) shop_prof_dir = os.path.join(BASE_DIR, 'cache/shop_prof') Session = install('sqlite:///cache/dianping.sqlite3') session = Session() save_shop_basic(session, shop_prof_dir) save_shop_review(session, shop_prof_dir) save_shop_cate(session, shop_prof_dir) session.close()
else: lvl_data[i] = 1 return sorted(lvl_data.items(), key=lambda d: d[0]) def plot_stat(stat, total, name): k = [d[0] for d in stat] v = [d[1] for d in stat] plt.plot(k, v, '-*') plt.title('{} distribution of {} shops'.format(name, total)) plt.xlabel(name) plt.ylabel('number') plt.show() def shop_rev(session): rev_cnt = [item.count for item in session.query(ShopReviewCnt).all()] plot_stat(aggressive(rev_cnt), len(rev_cnt), 'shop reviews count') if __name__ == '__main__': db_pf = 'sqlite:///cache/db_profile.sqlite3' Session = install(db_pf) session = Session() shop_rev(session) session.close()