Exemplo n.º 1
0
 def serve(self):
     self.logger.warning('开始服务')
     now = datetime.now()
     while not (now.hour == 23 and now.minute >= 30):
         session = DBSession()
         valid_ip_count = self._check_and_rank_ip(session)
         self.logger.warning('当前可用ip数量为: {}'.format(valid_ip_count))
         if valid_ip_count < POOL_SIZE:
             self._update_ip_pool(session)
         session.close()
         time.sleep(HEART_BEAT)
     self.logger.warning('服务完毕')
Exemplo n.º 2
0
async def save_cid_aid_relation(cid_aid: MutableMapping[int, int],
                                cid_info: MutableMapping[int, AvDanmakuCid]):
    """
  保存av与cid的关系
  """
    if cid_aid.keys().__len__() < 1:
        return
    objs: List[AVCidsDO] = []

    sql: str = 'select cid from av_cids where cid in (%s)' % ','.join(
        '%s' % item for item in cid_aid.keys())

    cids: ResultProxy = await execute_sql(sql)
    exist_cids: Set[int] = set()
    for item in cids.fetchall():
        """
    保存已经存在的关系
    """
        exist_cids.add(int(item[0]))

    if not exist_cids.__len__() == cid_aid.__len__():
        session = DBSession()
        for cid, aid in cid_aid.items():
            if exist_cids.__contains__(cid):
                continue
            obj: AVCidsDO = AVCidsDO()
            obj.cid = cid
            obj.aid = aid
            objs.append(obj)
        for cid in exist_cids:
            cid_info.pop(cid, None)

        try:

            if cid_info.values().__len__() > 0:
                for item in cid_info.values():
                    await execute_sql(
                        "update av_cids set page = %s, page_name = '%s' where cid = %s;"
                        % (item.page, item.pagename, item.cid))
            session.bulk_save_objects(objs)
            session.commit()
        except BaseException as e:
            session.rollback()
            raise e
        else:
            log.info('[Saved] av-cid relation. len: %s' % objs.__len__())
        finally:
            session.close()
    else:
        log.info('All av-cid relation exist')
Exemplo n.º 3
0
def __main__(mids: Set[int]):
    session = DBSession()
    for i in mids:
        mid = {'mid': i}
        res: HTTPResponse = selfusepy.get(
            'https://api.bilibili.com/x/space/acc/info', **mid)
        isUpdated: bool = False

        try:
            resData: UserProfile = selfusepy.parse_json(
                res.data, UserProfile())
            dbData: UserProfileDO = session.query(UserProfileDO).filter(
                UserProfileDO.mid == i).first()
            if dbData:  # 存在
                resDO: UserProfileDO = UserProfileDO(resData)
                for item in vars(dbData).items():
                    """
          将获取到的信息与db中的数据进行对比更新
          """
                    if item[0].startswith('_') or item[0] == "fans":
                        """
            由于它是一个由sqlalchemy更改过的DO类, 会有一些sqlalchemy需要的属性, 
            但我们并不需要的属性, 剔除掉
            配合更新fans的方法, 在此不对fans变量进行处理
            """
                        continue
                    try:
                        newValue = getattr(resDO, item[0])
                        if newValue != item[1]:
                            isUpdated = True
                            log.info(
                                '[UPDATE] mid: %s, key: %s, new: %s, old: %s' %
                                (i, item[0], newValue, item[1]))
                            setattr(dbData, item[0], newValue)
                    except BaseException as e:
                        raise e
                if not isUpdated:
                    log.info('[EQUAL] mid: %s' % i)
            else:
                log.info('[INSERT] mid: %s' % i)
                session.add(UserProfileDO(resData))

            session.commit()
        except BaseException as e:
            log.error('mid: %s, data: %s' % (i, res.data))
            raise e
        finally:
            log.info('[SLEEP] 2s')
            time.sleep(2)

    session.close()
Exemplo n.º 4
0
def Query():
    Session1 = DBSession()
    # 获取Order表中的符合要求列
    All_order = Session1.query(Order).filter(Order.Print_Status == 1).all()
    # 判断 如果存在
    if All_order:
        # 循环所有列,获取
        for one in range(len(All_order)):
            if All_order[one].Born_Date_Day == datetime.date.today():
                # if All_order[one].Print_Status == 1:
                try:
                    print('Datetime :' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\nId:' + str(
                        All_order[one].Id))  # 实例化当前时间

                    # 访问文件地址 进行下载与保存
                    # 自行配置
                    print_file = requests.get('http://XXX.XXX.XXX.XXX/static/Upload_Files/{}'.format(
                        All_order[one].File_Dir))  # 访问连接
                    if print_file.status_code != 200:  # 如果访问状态码不为零
                        print('No 200!')
                        raise IOError(
                            '{} {} {}'.format(print_file.status_code, print_file.reason,
                                              print_file.url))  # 自义定获取错误的信息
                    else:
                        with open('./static/go_print/' + All_order[one].File_Dir, 'wb') as f:
                            f.write(print_file.content)  # 下载文件

                    # 处理 报错
                except Exception as e:
                    print('no download!')
                    with open('./log/download_error_log', 'a') as f:
                        f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str(
                            All_order[one].Id) + ' ' + str(e) + '\n')
                else:
                    with open('./log/download_log', 'a') as f:
                        f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str(
                            All_order[one].Id) + ' sucessfully!' + '\n')
                    All_order[one].Print_Status = 2  # 做出标识,文件已下载成功
                finally:
                    Session1.commit()
                    print('>>>>>>>>>>>>>>><<<<<<<<<<<<<<<')
    else:
        pass
Exemplo n.º 5
0
async def query_all_cid_of_av(avInfo: AVInfoDO):
    global i_for_queryAllCidOfAv, Last_Request_Time
    log.info('[START] i: %s' % i_for_queryAllCidOfAv)

    delta = (Last_Request_Time + REQUEST_TIME_DELTA -
             time.time_ns()) / 1000_000_000
    time.sleep(delta if delta > 0 else 0)
    i_for_queryAllCidOfAv += 1

    log.info('[REQUEST] av\'s cids, aid: %s' % avInfo.aid)

    Last_Request_Time = time.time_ns()
    res: HTTPResponse = await selfusepy.get_async(
        'https://www.bilibili.com/widget/getPageList?aid=' + str(avInfo.aid))
    map: MutableMapping[int, AvDanmakuCid] = {}
    session = DBSession()
    try:
        j = json.loads(res.data)
        if isinstance(json.loads(res.data), list):
            for item in j:
                map[item['cid']] = selfusepy.parse_json(
                    json.dumps(item), AvDanmakuCid())
        log.info('[REQUEST] Done')
        log.info('[DATA] aid: %s, cid len: %s' % (avInfo.aid, map.__len__()))

        # 删除已经保存aid-cid的对应关系
        sql: str = 'select cid from av_cids where aid = %s and cid in (%s)' % (
            avInfo.aid, ','.join('%s' % item for item in map.keys()))
        r: ResultProxy = await execute_sql(sql)
        exist: Set[int] = set()
        for item in r.fetchall():
            exist.add(item[0])

        for item in map.items():
            if not exist.__contains__(item[0]):
                session.add(AVCidsDO(avInfo.aid, item[1]))
        session.commit()
        await filter_cid_which_isexist(avInfo.aid, map)
    except BaseException as e:
        log.error('aid: %s' % avInfo.aid)
        raise e
    finally:
        session.close()
Exemplo n.º 6
0
def destruct_danmaku(cid: int, danmakus: List[CustomTag]):
    danmakuMap: MutableMapping[int, DanmakuDO] = {}
    relationMap: MutableMapping[int, DanmakuRealationDO] = {}
    print('[FORMER] cid: %s, danmakus: %s' % (cid, danmakus.__len__()))
    for danmaku in danmakus:
        # 弹幕出现时间,模式,字体大小,颜色,发送时间戳,弹幕池,用户Hash,数据库ID
        obj: DanmakuDO = DanmakuDO()
        obj.content = danmaku.content
        l: list = danmaku.tag_content.split(',')
        obj.danmaku_epoch = float(l[0])
        obj.mode = int(l[1])
        obj.font_size = int(l[2])
        obj.font_color = int(l[3])
        obj.send_time = datetime.fromtimestamp(int(l[4]),
                                               timezone(timedelta(hours=8)))
        obj.danmaku_pool = int(l[5])
        obj.user_hash = int(l[6], 16)
        # 暂不直接从数据库获取crc32数据, 太慢了
        # value = crc32.get_value(l[6])
        # if value[0] > 0:
        #   obj.user_id = value[1]
        obj.id = int(l[7])

        relation: DanmakuRealationDO = DanmakuRealationDO()
        relation.cid = cid
        relation.danmaku_id = obj.id

        danmakuMap[obj.id] = obj
        relationMap[relation.danmaku_id] = relation

    session = DBSession()
    try:
        removeExist(cid, danmakuMap, relationMap)

        if danmakuMap.__len__() == relationMap.__len__(
        ) and relationMap.__len__() == 0:
            print('cid: %s, has saved all danmaku' % cid)
            return

        session.bulk_save_objects(
            danmakuMap.values() if danmakuMap.values().__len__() > 0 else None)
        session.bulk_save_objects(relationMap.values(
        ) if relationMap.values().__len__() > 0 else None)
        session.commit()
    except BaseException as e:
        session.rollback()
        print(e)
        print('cid: %s, has error. ' % cid)
    else:
        print('cid: %s, Saved into DB.' % cid)
    finally:
        session.close()
        print('[SAVED] danmakuMap.len: %s' % danmakuMap.__len__())
        print('[SAVED] relationMap.len: %s' % relationMap.__len__())
        danmakuMap.clear()
        relationMap.clear()
Exemplo n.º 7
0
def Print():
    Session2 = DBSession()
    cmd = 'ls -t ./static/go_print > ./log/goprint_log'  # 将打印的文件名,转移至log文件中
    subprocess.call(cmd, shell=True)
    # 读取文件夹下的内容
    Goprint = open('./log/goprint_log', 'r+')
    for line in Goprint:
        print_order = Session2.query(Order).filter(Order.File_Dir == line[:-1]).first()  # 查询订单信息

        try:  # 开始打印
            print('----------------' + print_order.File_Dir + '----------------')
            # 打印订单的 信息

            # pdf(Session2.query(User).filter(User.Id == print_order.User_Id).first().Tel_Number,
            #     print_order.Trade_Number)
            # print_cmd1 = 'lp -o fitplot ./static/html/1.pdf'
            # go_mac = subprocess.call(print_cmd1, shell=True)
            # if go_mac != 0:
            #     error = subprocess.getoutput(print_cmd1)
            #     raise IOError(error)

            # 打印用户文件
            if print_order.Print_Direction == '4':
                if print_order.File_Dir[-3:] in ['pdf', 'jpg', 'png', 'peg', 'psd', 'pdd', 'pdf', 'svg']:
                    print('try to print >< 1 ><' + print_order.File_Dir[-3:])
                    # 打印份数      打印的方向     单双面                                        打印份数
                    print_cmd2 = 'lp -n {} -o fitplot -o landscape -o sides={} -o ColorModel={} ./static/go_print/{}'.format(
                        print_order.Print_Copies,
                        print_order.Print_way,
                        print_order.Print_Colour,
                        line[:-1])
                else:
                    print('try to print >< 2 ><' + print_order.File_Dir[-3:])
                    print_cmd2 = 'lp -n {} -o landscape -o sides={} -o ColorModel={} ./static/go_print/{}'.format(
                        print_order.Print_Copies,
                        print_order.Print_way,
                        print_order.Print_Colour,
                        line[:-1])
            else:
                if print_order.File_Dir[-3:] in ['pdf', 'jpg', 'png', 'peg', 'psd', 'pdd', 'pdf', 'svg']:
                    print('try to print >< 3 ><' + print_order.File_Dir[-3:])
                    print_cmd2 = 'lp -n {} -o fitplot -o sides={} -o ColorModel={}  ./static/go_print/{}'.format(
                        print_order.Print_Copies,
                        print_order.Print_way,
                        print_order.Print_Colour,
                        line[:-1])
                else:
                    print('try to print >< 4 ><' + print_order.File_Dir[-3:])
                    print_cmd2 = 'lp -n {} -o sides={} -o ColorModel={}  ./static/go_print/{}'.format(
                        print_order.Print_Copies,
                        print_order.Print_way,
                        print_order.Print_Colour,
                        line[:-1])
            go_lp = subprocess.call(print_cmd2, shell=True)
            print('>>>>>>>>>>>>>>>>>>>>>>one<<<<<<<<<<<<<<<<<<<<<<')
            if go_lp != 0:
                error = subprocess.getoutput(print_cmd2)
                raise IOError(error)
            print('----------------lp----------------')
        except Exception as e:
            print('----------------error----------------')
            with open('./log/print_error_log', 'a') as f:
                f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " " + line[:-1] + " " + str(e) + "\n")
        else:
            print('----------------ok----------------')
            # 将打印完成的文件删除
            subprocess.call('rm ./static/go_print/{}'.format(line[:-1]), shell=True)
            print_order.Print_Status = 3
            with open('./log/print_success_log', 'a') as f:
                f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + line[
                                                                                     :-1] + ' ' + 'Successfully!' + "\n")
        finally:
            Session2.commit()
            print('>>>>>>>>>>>>>>><<<<<<<<<<<<<<<')
Exemplo n.º 8
0
__author__ = 'lenovo'
import sys
sys.path.append('/home/csc/getdata')



import datetime
import time

from function import get_search_engine_result
from config import DBSession
from database.project import Project
from database.status import Status


session = DBSession()
time_now = time.strftime("%Y-%m-%d %H:%M:%S")
print '当前时间:',time_now
#从项目表中查 当前需要爬取的项目
projects = session.query(Project.id, Project.pname, Project.keywords,Project.create_time,Project.period,Project.status).filter().all()
session.commit()
session.close()
duplicate_num = 0
new_num = 0
for project in projects:
    #print 'type:', type(project)
    pid  = project.id
    #print 'name:', project.pname.encode("utf8")
    #print 'keywords:', project.keywords.encode("utf8")
    ptitle = project.pname.encode("utf8").strip().replace(' ', '%20')
    status  = project.status
Exemplo n.º 9
0
def main():
  """
  测试需要调整数据库, s3删除, archive目录
  :return:
  """
  temp_file_dir = 'data-temp/'

  # download data
  log.info("Getting objects' keys")
  keys: Set[str] = _s3.get_all_objects_key()

  if keys.__len__() < 1:
    log.info("No file in COS!")
    exit(0)
  else:
    local_processing.multi_download(temp_file_dir, keys)
    if not _s3.archive_object(keys):
      log.error("Archive objects failed")
      exit(0)
    log.info("Download files, DONE.")

  # reading data
  all_data: MutableMapping[str, AV] = read_file(temp_file_dir)

  log.info("Analyze")
  # multi analyze
  pool = Pool(processes = cpu_use_number)
  q = multiprocessing.Manager().Queue()

  size = int(math.ceil(all_data.__len__() / float(cpu_use_number)))
  map_temp: MutableMapping[str, AV] = {}

  res: List[ApplyResult] = list()
  for key, value in all_data.items():
    map_temp[key] = value
    if map_temp.__len__() % size == 0:
      res.append(pool.apply_async(func = analyze, args = (q, map_temp,)))
      map_temp = {}
  res.append(pool.apply_async(func = analyze, args = (q, map_temp,)))
  pool.close()
  pool.join()
  if q.qsize() > 0:  # 当queue的size大于0的话, 那就是进程里面出现了错误, raise, 结束任务
    log.error('analyze occurs error')
    raise Exception(q)

  # saving
  all_avinfos: List[AVInfoDO] = []
  all_avstats: List[AVStatDO] = []
  for item in res:
    v = item.get()
    all_avinfos.extend(v[0])
    all_avstats.extend(v[1])

  # remove avinfos which exist in db already and same in program
  log.info("Remove duplicated avinfo")
  temp: Set[int] = set()  # db
  for item in all_avinfos:
    temp.add(item.aid)
  session = DBSession()
  sql: str = "select aid from av_info where aid in (%s)" % ",".join("%s" % item for item in temp)
  aids: ResultProxy = session.execute(sql)
  temp.clear()
  for item in aids.fetchall():
    temp.add(int(item[0]))

  temp2: List[AVInfoDO] = []  # program
  for item in all_avinfos:
    if not temp.__contains__(item.aid):
      temp2.append(item)
      temp.add(item.aid)
  all_avinfos = temp2

  # db
  log.info("Save infos(%s) and stats(%s)" % (all_avinfos.__len__(), all_avstats.__len__()))
  session.bulk_save_objects(all_avinfos)
  session.bulk_save_objects(all_avstats)
  session.commit()

  # archive
  log.info("Archive")
  for item in all_data.keys():
    index: int = item.find("/online")
    shutil.move(item[:index], "D:/spider archive")

  log.info('[Done]')
Exemplo n.º 10
0
 def gen_proxies(self):
     session = DBSession()
     valid_ip = session.query(IP).filter(IP.rank != None).order_by(IP.rank).limit(self._batch_size).all()
     proxies = None if valid_ip == [] else choice(valid_ip).to_proxy()
     session.close()
     return proxies
Exemplo n.º 11
0
def get():
    session = DBSession()
    valid_ip = session.query(IP).all()
    proxies = None if valid_ip == [] else choice(valid_ip).to_proxy()
    session.close()
    return json.dumps(proxies) if proxies is not None else 'None'
Exemplo n.º 12
0
def save_danmaku_to_db(q: Queue, danmakuMap: MutableMapping[int, DanmakuDO],
                       relationMap: MutableMapping[int, DanmakuRealationDO],
                       cid_danmakuIdSet: MutableMapping[int, Set[int]]):
    session = DBSession()
    try:
        remove_db_exist_ids(danmakuMap, relationMap, cid_danmakuIdSet.keys())
        print('[After Removed DB ids] danmaku len: %s, relation len: %s' %
              (danmakuMap.__len__(), relationMap.__len__()))

        if danmakuMap.__len__() != relationMap.__len__():
            raise Exception("danmaku's len is not eq relation's len")

        if danmakuMap.values():
            session.bulk_save_objects(danmakuMap.values())
        if relationMap.values():
            session.bulk_save_objects(relationMap.values())
        session.commit()
    except BaseException:
        session.rollback()
        name = multiprocessing.current_process().name
        _map: MutableMapping[str, str] = {name: traceback.format_exc()}
        q.put(_map)
        print('Oops: ', name)
    else:
        print('Save to DB success, len: %s' % danmakuMap.__len__())
        for cid, value in cid_danmakuIdSet.items():
            try:
                red.sadd(cid, *value)
            except BaseException:
                traceback.print_exc()
                print('[ERROR] redis. cid: %s' % cid)
        print('[DONE] save danmaku ids to redis')
    finally:
        session.close()
        del danmakuMap
        del relationMap
        gc.collect()
Exemplo n.º 13
0
#coding=utf8

import time

from config import DBSession
from database.status import Status

session2 = DBSession()
today_date = time.strftime("%Y-%m-%d")
statuss = Status(today = today_date, new_data = 1, duplicate_data = 2)
session2.add(statuss)
session2.commit()
session2.close()
Exemplo n.º 14
0
def processing_data(j: str, get_data_time: datetime):
    obj: AV = selfusepy.parse_json(j, AV())
    log.info("[Saving] top avs data: %s" % get_data_time.isoformat())
    session = DBSession()
    for i, item in enumerate(obj.onlineList):
        avInfoDO = AVInfoDO(item)
        avStatDO = AVStatDO(item, i + 1, get_data_time)

        exist: AVInfoDO = session.query(AVInfoDO).filter(
            AVInfoDO.aid == avInfoDO.aid).first()
        """
    存在则只添加关于av的statistic
    """
        try:
            if not exist:
                session.add(avInfoDO)
                session.add(avStatDO)
                log.info('[INSERT] aid: %s' % avInfoDO.aid)
            else:
                session.add(avStatDO)
                log.info('[UPDATE] av statistics, aid: %s' % avInfoDO.aid)

            session.commit()
        except BaseException as e:
            session.rollback()
            raise e
        else:
            log.info("[Update or Insert] success")

    session.close()
    log.info('[DONE] save top AVs')
Exemplo n.º 15
0
# coding=utf-8

__author__ = 'lenovo'
import sys
sys.path.append('/home/csc/getdata')

from config import DBSession
from database.domain import Domain
from function import get_ip_pv
import time


time_now = time.strftime("%Y-%m-%d %H:%M:%S")
print '当前时间:',time_now
session = DBSession()
websites = session.query(Domain.id, Domain.domain,Domain.ip,Domain.pv).filter().all()
session.commit()
session.close()
index = 0
for website in websites:
    index += 1
    time.sleep(60)
    print index,  website.domain, 'Start'
    lists = get_ip_pv.decorate_get_ip_pv(website.domain, 0)
    if not lists:
        continue
    #if (lists[0] == '-' or lists[1] == '-'):
    #    print index,  'No data, Finished'
    #    continue
    try:
        ip = int(lists[0])
Exemplo n.º 16
0
def update_user_fans():
    log.info("--------update fans running--------")
    last_timestamp: int = 0
    update_delta: int = 24 * 60 * 60
    file: List[dict] = list()
    try:
        while True:
            timestamp: int = int(time.time())
            if timestamp - last_timestamp >= update_delta:
                log.info("----------update fans----------")
                session = DBSession()

                mids: Set[int] = set()
                sql: str = 'select mid from "user"'
                res: ResultProxy = session.execute(sql)
                for item in res.fetchall():
                    mids.add(int(item[0]))
                log.info("mids: %s" % mids.__len__())

                for i, v in enumerate(mids):
                    try:
                        mid = {'mid': v}
                        res: HTTPResponse = selfusepy.get(
                            'http://api.bilibili.com/x/web-interface/card',
                            **mid)
                        j: dict = json.loads(res.data)
                        fans: int = int(j["data"]["follower"])
                        user: UserProfileDO = session.query(
                            UserProfileDO).filter(
                                UserProfileDO.mid == v).first()
                        if fans is None:
                            raise Exception("mid: %s, fans can not be none" %
                                            v)
                        # fans - user.fans if user.fans is not None else user.fans = 0
                        log.info(
                            "i: %s, mid: %s, former fans: %s, fans: %s, delta: %s"
                            % (i, v, user.fans, fans, fans -
                               user.fans if user.fans is not None else fans))
                        user.fans = fans
                        session.commit()
                        file.append({
                            "mid": v,
                            "former_fans": user.fans,
                            "fans": fans
                        })
                        time.sleep(2)
                    except BaseException as e:
                        log.info("mid: %s, user: %s" % (v, user))
                        raise e

                session.close()
                last_timestamp = timestamp
                file_name = "%s.json" % ("%s-%s" % ("fans", timestamp))
                file_path = "data-temp/%s" % file_name
                _file.save(json.dumps(file), file_path)
                _s3.put({file_name: file_path})
                log.info("----------update fans end----------")
            else:
                time.sleep(10)
    except BaseException as e:
        log.exception(e)
        import traceback
        if platform.system() != "Windows":
            _email.send(email_to_addr, traceback.format_exc())
Exemplo n.º 17
0

def func(cid: int):
    print(multiprocessing.current_process().name + str(cid))


def fig(n: int):
    dp: dict = {k: 0 for k in range(1, n + 2)}
    dp[1] = dp[2] = 1
    for i in range(3, n + 1):
        dp[i] = dp[i - 1] + dp[i - 2]
    return dp[n]


if __name__ == '__main__':
    session = DBSession()
    print(session.query(UserProfileDO).all().__len__())

    # for item in session.query(UserProfileDO).all():
    #   print(item)

    exit(0)
    # import asyncio
    # loop = asyncio.get_event_loop()
    # start_time: int = int(time.time())
    # tasks = list()
    # tasks.append(hello(start_time))
    # tasks.append(hello(start_time))
    # loop.run_until_complete(asyncio.wait(tasks))
    # loop.close()
    # exit(0)