def serve(self): self.logger.warning('开始服务') now = datetime.now() while not (now.hour == 23 and now.minute >= 30): session = DBSession() valid_ip_count = self._check_and_rank_ip(session) self.logger.warning('当前可用ip数量为: {}'.format(valid_ip_count)) if valid_ip_count < POOL_SIZE: self._update_ip_pool(session) session.close() time.sleep(HEART_BEAT) self.logger.warning('服务完毕')
async def save_cid_aid_relation(cid_aid: MutableMapping[int, int], cid_info: MutableMapping[int, AvDanmakuCid]): """ 保存av与cid的关系 """ if cid_aid.keys().__len__() < 1: return objs: List[AVCidsDO] = [] sql: str = 'select cid from av_cids where cid in (%s)' % ','.join( '%s' % item for item in cid_aid.keys()) cids: ResultProxy = await execute_sql(sql) exist_cids: Set[int] = set() for item in cids.fetchall(): """ 保存已经存在的关系 """ exist_cids.add(int(item[0])) if not exist_cids.__len__() == cid_aid.__len__(): session = DBSession() for cid, aid in cid_aid.items(): if exist_cids.__contains__(cid): continue obj: AVCidsDO = AVCidsDO() obj.cid = cid obj.aid = aid objs.append(obj) for cid in exist_cids: cid_info.pop(cid, None) try: if cid_info.values().__len__() > 0: for item in cid_info.values(): await execute_sql( "update av_cids set page = %s, page_name = '%s' where cid = %s;" % (item.page, item.pagename, item.cid)) session.bulk_save_objects(objs) session.commit() except BaseException as e: session.rollback() raise e else: log.info('[Saved] av-cid relation. len: %s' % objs.__len__()) finally: session.close() else: log.info('All av-cid relation exist')
def __main__(mids: Set[int]): session = DBSession() for i in mids: mid = {'mid': i} res: HTTPResponse = selfusepy.get( 'https://api.bilibili.com/x/space/acc/info', **mid) isUpdated: bool = False try: resData: UserProfile = selfusepy.parse_json( res.data, UserProfile()) dbData: UserProfileDO = session.query(UserProfileDO).filter( UserProfileDO.mid == i).first() if dbData: # 存在 resDO: UserProfileDO = UserProfileDO(resData) for item in vars(dbData).items(): """ 将获取到的信息与db中的数据进行对比更新 """ if item[0].startswith('_') or item[0] == "fans": """ 由于它是一个由sqlalchemy更改过的DO类, 会有一些sqlalchemy需要的属性, 但我们并不需要的属性, 剔除掉 配合更新fans的方法, 在此不对fans变量进行处理 """ continue try: newValue = getattr(resDO, item[0]) if newValue != item[1]: isUpdated = True log.info( '[UPDATE] mid: %s, key: %s, new: %s, old: %s' % (i, item[0], newValue, item[1])) setattr(dbData, item[0], newValue) except BaseException as e: raise e if not isUpdated: log.info('[EQUAL] mid: %s' % i) else: log.info('[INSERT] mid: %s' % i) session.add(UserProfileDO(resData)) session.commit() except BaseException as e: log.error('mid: %s, data: %s' % (i, res.data)) raise e finally: log.info('[SLEEP] 2s') time.sleep(2) session.close()
def Query(): Session1 = DBSession() # 获取Order表中的符合要求列 All_order = Session1.query(Order).filter(Order.Print_Status == 1).all() # 判断 如果存在 if All_order: # 循环所有列,获取 for one in range(len(All_order)): if All_order[one].Born_Date_Day == datetime.date.today(): # if All_order[one].Print_Status == 1: try: print('Datetime :' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\nId:' + str( All_order[one].Id)) # 实例化当前时间 # 访问文件地址 进行下载与保存 # 自行配置 print_file = requests.get('http://XXX.XXX.XXX.XXX/static/Upload_Files/{}'.format( All_order[one].File_Dir)) # 访问连接 if print_file.status_code != 200: # 如果访问状态码不为零 print('No 200!') raise IOError( '{} {} {}'.format(print_file.status_code, print_file.reason, print_file.url)) # 自义定获取错误的信息 else: with open('./static/go_print/' + All_order[one].File_Dir, 'wb') as f: f.write(print_file.content) # 下载文件 # 处理 报错 except Exception as e: print('no download!') with open('./log/download_error_log', 'a') as f: f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str( All_order[one].Id) + ' ' + str(e) + '\n') else: with open('./log/download_log', 'a') as f: f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str( All_order[one].Id) + ' sucessfully!' + '\n') All_order[one].Print_Status = 2 # 做出标识,文件已下载成功 finally: Session1.commit() print('>>>>>>>>>>>>>>><<<<<<<<<<<<<<<') else: pass
async def query_all_cid_of_av(avInfo: AVInfoDO): global i_for_queryAllCidOfAv, Last_Request_Time log.info('[START] i: %s' % i_for_queryAllCidOfAv) delta = (Last_Request_Time + REQUEST_TIME_DELTA - time.time_ns()) / 1000_000_000 time.sleep(delta if delta > 0 else 0) i_for_queryAllCidOfAv += 1 log.info('[REQUEST] av\'s cids, aid: %s' % avInfo.aid) Last_Request_Time = time.time_ns() res: HTTPResponse = await selfusepy.get_async( 'https://www.bilibili.com/widget/getPageList?aid=' + str(avInfo.aid)) map: MutableMapping[int, AvDanmakuCid] = {} session = DBSession() try: j = json.loads(res.data) if isinstance(json.loads(res.data), list): for item in j: map[item['cid']] = selfusepy.parse_json( json.dumps(item), AvDanmakuCid()) log.info('[REQUEST] Done') log.info('[DATA] aid: %s, cid len: %s' % (avInfo.aid, map.__len__())) # 删除已经保存aid-cid的对应关系 sql: str = 'select cid from av_cids where aid = %s and cid in (%s)' % ( avInfo.aid, ','.join('%s' % item for item in map.keys())) r: ResultProxy = await execute_sql(sql) exist: Set[int] = set() for item in r.fetchall(): exist.add(item[0]) for item in map.items(): if not exist.__contains__(item[0]): session.add(AVCidsDO(avInfo.aid, item[1])) session.commit() await filter_cid_which_isexist(avInfo.aid, map) except BaseException as e: log.error('aid: %s' % avInfo.aid) raise e finally: session.close()
def destruct_danmaku(cid: int, danmakus: List[CustomTag]): danmakuMap: MutableMapping[int, DanmakuDO] = {} relationMap: MutableMapping[int, DanmakuRealationDO] = {} print('[FORMER] cid: %s, danmakus: %s' % (cid, danmakus.__len__())) for danmaku in danmakus: # 弹幕出现时间,模式,字体大小,颜色,发送时间戳,弹幕池,用户Hash,数据库ID obj: DanmakuDO = DanmakuDO() obj.content = danmaku.content l: list = danmaku.tag_content.split(',') obj.danmaku_epoch = float(l[0]) obj.mode = int(l[1]) obj.font_size = int(l[2]) obj.font_color = int(l[3]) obj.send_time = datetime.fromtimestamp(int(l[4]), timezone(timedelta(hours=8))) obj.danmaku_pool = int(l[5]) obj.user_hash = int(l[6], 16) # 暂不直接从数据库获取crc32数据, 太慢了 # value = crc32.get_value(l[6]) # if value[0] > 0: # obj.user_id = value[1] obj.id = int(l[7]) relation: DanmakuRealationDO = DanmakuRealationDO() relation.cid = cid relation.danmaku_id = obj.id danmakuMap[obj.id] = obj relationMap[relation.danmaku_id] = relation session = DBSession() try: removeExist(cid, danmakuMap, relationMap) if danmakuMap.__len__() == relationMap.__len__( ) and relationMap.__len__() == 0: print('cid: %s, has saved all danmaku' % cid) return session.bulk_save_objects( danmakuMap.values() if danmakuMap.values().__len__() > 0 else None) session.bulk_save_objects(relationMap.values( ) if relationMap.values().__len__() > 0 else None) session.commit() except BaseException as e: session.rollback() print(e) print('cid: %s, has error. ' % cid) else: print('cid: %s, Saved into DB.' % cid) finally: session.close() print('[SAVED] danmakuMap.len: %s' % danmakuMap.__len__()) print('[SAVED] relationMap.len: %s' % relationMap.__len__()) danmakuMap.clear() relationMap.clear()
def Print(): Session2 = DBSession() cmd = 'ls -t ./static/go_print > ./log/goprint_log' # 将打印的文件名,转移至log文件中 subprocess.call(cmd, shell=True) # 读取文件夹下的内容 Goprint = open('./log/goprint_log', 'r+') for line in Goprint: print_order = Session2.query(Order).filter(Order.File_Dir == line[:-1]).first() # 查询订单信息 try: # 开始打印 print('----------------' + print_order.File_Dir + '----------------') # 打印订单的 信息 # pdf(Session2.query(User).filter(User.Id == print_order.User_Id).first().Tel_Number, # print_order.Trade_Number) # print_cmd1 = 'lp -o fitplot ./static/html/1.pdf' # go_mac = subprocess.call(print_cmd1, shell=True) # if go_mac != 0: # error = subprocess.getoutput(print_cmd1) # raise IOError(error) # 打印用户文件 if print_order.Print_Direction == '4': if print_order.File_Dir[-3:] in ['pdf', 'jpg', 'png', 'peg', 'psd', 'pdd', 'pdf', 'svg']: print('try to print >< 1 ><' + print_order.File_Dir[-3:]) # 打印份数 打印的方向 单双面 打印份数 print_cmd2 = 'lp -n {} -o fitplot -o landscape -o sides={} -o ColorModel={} ./static/go_print/{}'.format( print_order.Print_Copies, print_order.Print_way, print_order.Print_Colour, line[:-1]) else: print('try to print >< 2 ><' + print_order.File_Dir[-3:]) print_cmd2 = 'lp -n {} -o landscape -o sides={} -o ColorModel={} ./static/go_print/{}'.format( print_order.Print_Copies, print_order.Print_way, print_order.Print_Colour, line[:-1]) else: if print_order.File_Dir[-3:] in ['pdf', 'jpg', 'png', 'peg', 'psd', 'pdd', 'pdf', 'svg']: print('try to print >< 3 ><' + print_order.File_Dir[-3:]) print_cmd2 = 'lp -n {} -o fitplot -o sides={} -o ColorModel={} ./static/go_print/{}'.format( print_order.Print_Copies, print_order.Print_way, print_order.Print_Colour, line[:-1]) else: print('try to print >< 4 ><' + print_order.File_Dir[-3:]) print_cmd2 = 'lp -n {} -o sides={} -o ColorModel={} ./static/go_print/{}'.format( print_order.Print_Copies, print_order.Print_way, print_order.Print_Colour, line[:-1]) go_lp = subprocess.call(print_cmd2, shell=True) print('>>>>>>>>>>>>>>>>>>>>>>one<<<<<<<<<<<<<<<<<<<<<<') if go_lp != 0: error = subprocess.getoutput(print_cmd2) raise IOError(error) print('----------------lp----------------') except Exception as e: print('----------------error----------------') with open('./log/print_error_log', 'a') as f: f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " " + line[:-1] + " " + str(e) + "\n") else: print('----------------ok----------------') # 将打印完成的文件删除 subprocess.call('rm ./static/go_print/{}'.format(line[:-1]), shell=True) print_order.Print_Status = 3 with open('./log/print_success_log', 'a') as f: f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + line[ :-1] + ' ' + 'Successfully!' + "\n") finally: Session2.commit() print('>>>>>>>>>>>>>>><<<<<<<<<<<<<<<')
__author__ = 'lenovo' import sys sys.path.append('/home/csc/getdata') import datetime import time from function import get_search_engine_result from config import DBSession from database.project import Project from database.status import Status session = DBSession() time_now = time.strftime("%Y-%m-%d %H:%M:%S") print '当前时间:',time_now #从项目表中查 当前需要爬取的项目 projects = session.query(Project.id, Project.pname, Project.keywords,Project.create_time,Project.period,Project.status).filter().all() session.commit() session.close() duplicate_num = 0 new_num = 0 for project in projects: #print 'type:', type(project) pid = project.id #print 'name:', project.pname.encode("utf8") #print 'keywords:', project.keywords.encode("utf8") ptitle = project.pname.encode("utf8").strip().replace(' ', '%20') status = project.status
def main(): """ 测试需要调整数据库, s3删除, archive目录 :return: """ temp_file_dir = 'data-temp/' # download data log.info("Getting objects' keys") keys: Set[str] = _s3.get_all_objects_key() if keys.__len__() < 1: log.info("No file in COS!") exit(0) else: local_processing.multi_download(temp_file_dir, keys) if not _s3.archive_object(keys): log.error("Archive objects failed") exit(0) log.info("Download files, DONE.") # reading data all_data: MutableMapping[str, AV] = read_file(temp_file_dir) log.info("Analyze") # multi analyze pool = Pool(processes = cpu_use_number) q = multiprocessing.Manager().Queue() size = int(math.ceil(all_data.__len__() / float(cpu_use_number))) map_temp: MutableMapping[str, AV] = {} res: List[ApplyResult] = list() for key, value in all_data.items(): map_temp[key] = value if map_temp.__len__() % size == 0: res.append(pool.apply_async(func = analyze, args = (q, map_temp,))) map_temp = {} res.append(pool.apply_async(func = analyze, args = (q, map_temp,))) pool.close() pool.join() if q.qsize() > 0: # 当queue的size大于0的话, 那就是进程里面出现了错误, raise, 结束任务 log.error('analyze occurs error') raise Exception(q) # saving all_avinfos: List[AVInfoDO] = [] all_avstats: List[AVStatDO] = [] for item in res: v = item.get() all_avinfos.extend(v[0]) all_avstats.extend(v[1]) # remove avinfos which exist in db already and same in program log.info("Remove duplicated avinfo") temp: Set[int] = set() # db for item in all_avinfos: temp.add(item.aid) session = DBSession() sql: str = "select aid from av_info where aid in (%s)" % ",".join("%s" % item for item in temp) aids: ResultProxy = session.execute(sql) temp.clear() for item in aids.fetchall(): temp.add(int(item[0])) temp2: List[AVInfoDO] = [] # program for item in all_avinfos: if not temp.__contains__(item.aid): temp2.append(item) temp.add(item.aid) all_avinfos = temp2 # db log.info("Save infos(%s) and stats(%s)" % (all_avinfos.__len__(), all_avstats.__len__())) session.bulk_save_objects(all_avinfos) session.bulk_save_objects(all_avstats) session.commit() # archive log.info("Archive") for item in all_data.keys(): index: int = item.find("/online") shutil.move(item[:index], "D:/spider archive") log.info('[Done]')
def gen_proxies(self): session = DBSession() valid_ip = session.query(IP).filter(IP.rank != None).order_by(IP.rank).limit(self._batch_size).all() proxies = None if valid_ip == [] else choice(valid_ip).to_proxy() session.close() return proxies
def get(): session = DBSession() valid_ip = session.query(IP).all() proxies = None if valid_ip == [] else choice(valid_ip).to_proxy() session.close() return json.dumps(proxies) if proxies is not None else 'None'
def save_danmaku_to_db(q: Queue, danmakuMap: MutableMapping[int, DanmakuDO], relationMap: MutableMapping[int, DanmakuRealationDO], cid_danmakuIdSet: MutableMapping[int, Set[int]]): session = DBSession() try: remove_db_exist_ids(danmakuMap, relationMap, cid_danmakuIdSet.keys()) print('[After Removed DB ids] danmaku len: %s, relation len: %s' % (danmakuMap.__len__(), relationMap.__len__())) if danmakuMap.__len__() != relationMap.__len__(): raise Exception("danmaku's len is not eq relation's len") if danmakuMap.values(): session.bulk_save_objects(danmakuMap.values()) if relationMap.values(): session.bulk_save_objects(relationMap.values()) session.commit() except BaseException: session.rollback() name = multiprocessing.current_process().name _map: MutableMapping[str, str] = {name: traceback.format_exc()} q.put(_map) print('Oops: ', name) else: print('Save to DB success, len: %s' % danmakuMap.__len__()) for cid, value in cid_danmakuIdSet.items(): try: red.sadd(cid, *value) except BaseException: traceback.print_exc() print('[ERROR] redis. cid: %s' % cid) print('[DONE] save danmaku ids to redis') finally: session.close() del danmakuMap del relationMap gc.collect()
#coding=utf8 import time from config import DBSession from database.status import Status session2 = DBSession() today_date = time.strftime("%Y-%m-%d") statuss = Status(today = today_date, new_data = 1, duplicate_data = 2) session2.add(statuss) session2.commit() session2.close()
def processing_data(j: str, get_data_time: datetime): obj: AV = selfusepy.parse_json(j, AV()) log.info("[Saving] top avs data: %s" % get_data_time.isoformat()) session = DBSession() for i, item in enumerate(obj.onlineList): avInfoDO = AVInfoDO(item) avStatDO = AVStatDO(item, i + 1, get_data_time) exist: AVInfoDO = session.query(AVInfoDO).filter( AVInfoDO.aid == avInfoDO.aid).first() """ 存在则只添加关于av的statistic """ try: if not exist: session.add(avInfoDO) session.add(avStatDO) log.info('[INSERT] aid: %s' % avInfoDO.aid) else: session.add(avStatDO) log.info('[UPDATE] av statistics, aid: %s' % avInfoDO.aid) session.commit() except BaseException as e: session.rollback() raise e else: log.info("[Update or Insert] success") session.close() log.info('[DONE] save top AVs')
# coding=utf-8 __author__ = 'lenovo' import sys sys.path.append('/home/csc/getdata') from config import DBSession from database.domain import Domain from function import get_ip_pv import time time_now = time.strftime("%Y-%m-%d %H:%M:%S") print '当前时间:',time_now session = DBSession() websites = session.query(Domain.id, Domain.domain,Domain.ip,Domain.pv).filter().all() session.commit() session.close() index = 0 for website in websites: index += 1 time.sleep(60) print index, website.domain, 'Start' lists = get_ip_pv.decorate_get_ip_pv(website.domain, 0) if not lists: continue #if (lists[0] == '-' or lists[1] == '-'): # print index, 'No data, Finished' # continue try: ip = int(lists[0])
def update_user_fans(): log.info("--------update fans running--------") last_timestamp: int = 0 update_delta: int = 24 * 60 * 60 file: List[dict] = list() try: while True: timestamp: int = int(time.time()) if timestamp - last_timestamp >= update_delta: log.info("----------update fans----------") session = DBSession() mids: Set[int] = set() sql: str = 'select mid from "user"' res: ResultProxy = session.execute(sql) for item in res.fetchall(): mids.add(int(item[0])) log.info("mids: %s" % mids.__len__()) for i, v in enumerate(mids): try: mid = {'mid': v} res: HTTPResponse = selfusepy.get( 'http://api.bilibili.com/x/web-interface/card', **mid) j: dict = json.loads(res.data) fans: int = int(j["data"]["follower"]) user: UserProfileDO = session.query( UserProfileDO).filter( UserProfileDO.mid == v).first() if fans is None: raise Exception("mid: %s, fans can not be none" % v) # fans - user.fans if user.fans is not None else user.fans = 0 log.info( "i: %s, mid: %s, former fans: %s, fans: %s, delta: %s" % (i, v, user.fans, fans, fans - user.fans if user.fans is not None else fans)) user.fans = fans session.commit() file.append({ "mid": v, "former_fans": user.fans, "fans": fans }) time.sleep(2) except BaseException as e: log.info("mid: %s, user: %s" % (v, user)) raise e session.close() last_timestamp = timestamp file_name = "%s.json" % ("%s-%s" % ("fans", timestamp)) file_path = "data-temp/%s" % file_name _file.save(json.dumps(file), file_path) _s3.put({file_name: file_path}) log.info("----------update fans end----------") else: time.sleep(10) except BaseException as e: log.exception(e) import traceback if platform.system() != "Windows": _email.send(email_to_addr, traceback.format_exc())
def func(cid: int): print(multiprocessing.current_process().name + str(cid)) def fig(n: int): dp: dict = {k: 0 for k in range(1, n + 2)} dp[1] = dp[2] = 1 for i in range(3, n + 1): dp[i] = dp[i - 1] + dp[i - 2] return dp[n] if __name__ == '__main__': session = DBSession() print(session.query(UserProfileDO).all().__len__()) # for item in session.query(UserProfileDO).all(): # print(item) exit(0) # import asyncio # loop = asyncio.get_event_loop() # start_time: int = int(time.time()) # tasks = list() # tasks.append(hello(start_time)) # tasks.append(hello(start_time)) # loop.run_until_complete(asyncio.wait(tasks)) # loop.close() # exit(0)