def get_douban_site(): with open('data/intro') as i: intro = json.loads(i.read()) with open('data/info') as i: info = json.loads(i.read()) with open('data/meta') as i: meta = json.loads(i.read()) #zs = ZsiteLink.where(link='http://site.douban.com/110633/(1号厅的光影传奇)') #if zs: # zs = zs[0] # zs.link = 'http://site.douban.com/110633/' # zs.save() for zl in ZsiteLink.where(name='豆瓣小站').order_by('id desc').col_list(col='zsite_id'): if not Rss.where(user_id=zl): zs = ZsiteLink.raw_sql('select link from zsite_link where link like %s and zsite_id=%s and cid=2', 'http://site.douban.com%', zl).fetchone() if zs: id = zs[0].split('/')[-1] or zs[0].split('/')[-2] if info.get(id): like, link, img, name = info.get(id) if meta.get(id): motto = meta.get(id)[0][0] motto = motto.split('<br />')[0] motto = motto.split('<a')[0] img_src = meta.get(id)[0][1] rss_new(zl, 'http://rss-tidy.42qu.com/douban/site/%s'%id, name, link, auto=1) print zl, 'http://site.douban.com/%s'%id else: print id, 'no motto data' else: print id, '数据未录入', zl
def check(): s = set() for r in Rss.where(): s.add(r.url.split('/')[-1] or r.url.split('/')[-2]) with open('makeexcel.txt') as me: for i in me: i = i.strip() if i not in s: print r.user_id get_in(i) time.sleep(1) else: count = po_cid_count_by_zsite_id(r.user_id, CID_NOTE) if count == 0: zsite_show_rm(Zsite.mc_get(r.user_id)) #zsite_fav_rm_all_by_zsite_id(r.user_id) zsite_admin_empty(r.user_id) print r.user_id, '!!' print i
def get_douban_site(): with open('data/intro') as i: intro = json.loads(i.read()) with open('data/info') as i: info = json.loads(i.read()) with open('data/meta') as i: meta = json.loads(i.read()) #zs = ZsiteLink.where(link='http://site.douban.com/110633/(1号厅的光影传奇)') #if zs: # zs = zs[0] # zs.link = 'http://site.douban.com/110633/' # zs.save() for zl in ZsiteLink.where(name='豆瓣小站').order_by('id desc').col_list( col='zsite_id'): if not Rss.where(user_id=zl): zs = ZsiteLink.raw_sql( 'select link from zsite_link where link like %s and zsite_id=%s and cid=2', 'http://site.douban.com%', zl).fetchone() if zs: id = zs[0].split('/')[-1] or zs[0].split('/')[-2] if info.get(id): like, link, img, name = info.get(id) if meta.get(id): motto = meta.get(id)[0][0] motto = motto.split('<br />')[0] motto = motto.split('<a')[0] img_src = meta.get(id)[0][1] rss_new(zl, 'http://rss-tidy.42qu.com/douban/site/%s' % id, name, link, auto=1) print zl, 'http://site.douban.com/%s' % id else: print id, 'no motto data' else: print id, '数据未录入', zl
def rss_subscribe(greader=None): from zkit.google.findrss import get_rss_link_title_by_url rss_list = [] for i in Rss.where(gid=0): url = i.url.strip() #print url if not all((i.link, i.url, i.name)): rss, link, name = get_rss_link_title_by_url(url) #print link, name if rss: i.url = rss if link: i.link = link if not name: name = link.split('://', 1)[-1] if name: i.name = name i.save() rss_list.append(i) if rss_list: if greader is None: greader = Reader(GREADER_USERNAME, GREADER_PASSWORD) for i in rss_list: #print i.url url = quote(i.url) try: greader.subscribe(url) i.gid = 1 i.save() except: traceback.print_exc() print i.url, i.user_id i.delete() try: #print i.url feed = 'feed/%s' % url user_id = i.user_id duplicator_set_by_user_id(user_id) rss_feed_update(greader.feed(feed), i.id, user_id, 1024) # greader.mark_as_read(feed) except: traceback.print_exc() for i in Rss.where('gid<0'): if greader is None: greader = Reader(GREADER_USERNAME, GREADER_PASSWORD) try: greader.unsubscribe('feed/' + quote(i.url)) except: traceback.print_exc() print i.url, i.user_id i.delete()
def rss_subscribe(greader=None): from zkit.google.findrss import get_rss_link_title_by_url rss_list = [] for i in Rss.where(gid=0): url = i.url.strip() #print url if not all((i.link, i.url, i.name)): rss, link, name = get_rss_link_title_by_url(url) #print link, name if rss: i.url = rss if link: i.link = link if not name: name = link.split('://', 1)[-1] if name: i.name = name i.save() rss_list.append(i) if rss_list: if greader is None: greader = Reader(GREADER_USERNAME, GREADER_PASSWORD) for i in rss_list: #print i.url url = quote(i.url) try: greader.subscribe(url) i.gid = 1 i.save() except: traceback.print_exc() print i.url, i.user_id i.delete() try: #print i.url feed = 'feed/%s'%url user_id = i.user_id duplicator_set_by_user_id(user_id) rss_feed_update(greader.feed(feed), i.id, user_id, 1024) # greader.mark_as_read(feed) except: traceback.print_exc() for i in Rss.where('gid<0'): if greader is None: greader = Reader(GREADER_USERNAME, GREADER_PASSWORD) try: greader.unsubscribe('feed/'+quote(i.url)) except: traceback.print_exc() print i.url, i.user_id i.delete()
txt = get_rss_link_title_by_rss(j)[-1] except: continue else: output.write("""<outline text="%s" title="%s" type="rss" xmlUrl="%s" htmlUrl="%s"/> """ % (txt, txt, j, j)) output.write("""</body></opml>""") def print_uri(): links = get_uri() with open('rss2user_id.txt', 'w') as f: for i in links: f.write('%s %s\n' % (i[0], i[1])) if __name__ == '__main__': #print_uri() #print_rss() #print_xml() from model.rss import Rss from model.zsite import Zsite for i in Rss.where(): zsite = Zsite.mc_get( i.user_id ) if zsite.cid == CID_SITE: print 'http:%s'%zsite.link, '\t\t\t', zsite.name