Exemplo n.º 1
0
def find_debug_log(request):
    """
    查看测试执行日志
    :param request:
    :return:
    """
    work_path = os.getcwd()
    try:
        if request.method == 'GET':
            project_name = request.GET.get('project_name')
            spider_name = request.GET.get('spider_name')
            current_line = int(request.GET.get('current_line'))
            project_path = join(PROJECTS_FOLDER, project_name)
            os.chdir(project_path)
            if not os.path.exists("debug_folder"):
                r = Result.success(data='')
                return JsonResponse(r)
            input_file = open('./debug_folder/logs/{}.log'.format(spider_name), 'r', encoding='utf-8')
            lines = input_file.readlines()
            input_file.close()
            response = []
            for line in lines[(current_line - 1):]:
                data = {'current_line': current_line, 'data': line}
                response.append(data)
                current_line = current_line + 1
            r = Result.success(response)
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
    finally:
        os.chdir(work_path)
Exemplo n.º 2
0
def script_newest_log(request):
    """
    获取脚本最新日志
    :param request:
    :return:
    """
    try:
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            script_id = data.get('script_id')
            host_ip = data.get('host_ip')
            script = CrawlScript.objects.get(id=script_id)
            project_name = script.project_name
            spider_name = script.name
            job_id = script.job_id
            if not job_id:
                r = Result.success('暂无日志')
                return JsonResponse(r)

            url = 'http://{}/logs/{}/{}/{}.log'.format(host_ip, project_name, spider_name, job_id)
            response = requests.get(url)
            if response.status_code != 200:
                r = Result.success('暂无日志')
                return JsonResponse(r)
            log_content = response.content.decode('utf-8')
            r = Result.success({'message': log_content})
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 3
0
def find_debug_result(request):
    """
    查看测试执行结果
    :param request:
    :return:
    """
    work_path = os.getcwd()
    try:
        if request.method == 'GET':
            project_name = request.GET.get('project_name')
            spider_name = request.GET.get('spider_name')
            project_path = join(PROJECTS_FOLDER, project_name)
            os.chdir(project_path)
            if not os.path.exists("debug_folder"):
                r = Result.success(data='')
                return JsonResponse(r)
            input_file = open('./debug_folder/items/{}.json'.format(spider_name))
            all_text = input_file.read()
            input_file.close()
            r = Result.success({'content': all_text})
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
    finally:
        os.chdir(work_path)
Exemplo n.º 4
0
def project_deploy(request, project_name):
    """
    发布爬虫工程
    :param request: request object
    :param project_name: project name
    :return: json of deploy result
    """
    if request.method == 'POST':
        path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER))
        project_path = join(path, project_name)
        # 检索打包egg文件
        egg = find_egg(project_path)
        if not egg:
            r = Result.success("没有打包文件")
            return JsonResponse(r)
        egg_file = open(join(project_path, egg), 'rb')

        data = json.loads(request.body.decode('utf-8'))
        node_ids = data["node_ids"]

        nodes = CrawlNode.objects.filter(id__in=node_ids)
        project = CrawlProject.objects.get(name=project_name)
        for node in nodes:
            engine = get_engine(node)
            engine.add_version(project_name, int(time.time()), egg_file.read())
            deployed_at = timezone.now()
            CrawlDeploy.objects.filter(
                node_id=node.id, project_id=project.id).delete()  # 这里逻辑删除
            deploy, result = CrawlDeploy.objects.update_or_create(
                node_id=node.id,
                project_id=project.id,
                deployed_at=deployed_at,
                description=project.description)
        r = Result.success("")
        return JsonResponse(r)
Exemplo n.º 5
0
def fetch_user_permissions(request):
    """
    获取用户菜单权限列表
    :param request:
    :return:
    """
    user_id = request.user_id
    user_roles = CrawlUserRoleRel.objects.filter(user_id=user_id, is_deleted=0)
    if not user_roles:
        return JsonResponse(Result.success(data={}))
    permission_tree = build_permission_tree(user_roles)
    crawl_redis.set('permission#user#{}'.format(user_id), json.dumps(permission_tree))
    r = Result.success(data=permission_tree)
    return JsonResponse(r)
Exemplo n.º 6
0
def reset_profile_pwd(request, user_id):
    """
    重置个人密码
    :param user_id:
    :param request:
    :return:
    """
    try:
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            old_pwd = data.get('old_pwd')
            new_pwd = data.get('new_pwd')
            confirm_pwd = data.get('confirm_pwd')
            user = CrawlUser.objects.get(id=user_id)
            if confirm_pwd != new_pwd:
                raise Exception('两次密码输入不一致')
            db_pwd = user.password
            if db_pwd != password2md5(old_pwd):
                raise Exception('密码不正确')
            user.password = password2md5(new_pwd)
            user.save()
            r = Result.success(None)
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 7
0
def login(request):
    """
    登录(TODO 使用jwt)
    :param request:
    :return:
    """
    try:
        domain = settings.SESSION_COOKIE_DOMAIN
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            username = data.get('username').strip()
            password = data.get('password').strip()

            user = CrawlUser.objects.get(username=username)
            if not user:
                raise Exception('用户名或密码不正确')
            else:
                if password2md5(password) == user.password:
                    token = jwt_tools.encode_token(user.id, user.username)
                    r = Result.success(None)
                    response = JsonResponse(r)
                    response.set_cookie('dt_token', bytes.decode(token), domain=domain, max_age=60 * 60 * 24 * 30)
                    response.set_cookie('dt_user_id', user.id, domain=domain, max_age=60 * 60 * 24 * 30)
                    response.set_cookie('dt_username', user.username, domain=domain, max_age=60 * 60 * 24 * 30)
                    return response
                else:
                    raise Exception('用户名或密码不正确')
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 8
0
def list_role(request):
    """
    角色列表
    :param request:
    :return:
    """
    if request.method == 'GET':
        page = request.GET.get('page', 1)
        size = request.GET.get('size', 15)

        response = []

        roles = CrawlRole.objects.filter(is_deleted=0)

        relsDict = CrawlPermission.objects.filter(is_deleted=0)

        for role in roles:
            rels = CrawlRolePermission.objects.filter(is_deleted=0, role_id=role.id)
            role_permissions = []
            for rel in rels:
                permissions = relsDict.get(id=rel.permission_id)
                role_permissions.append(model_to_dict(permissions).get('permission_name'))
                roleD = model_to_dict(role)
                roleD.__setitem__('permission', role_permissions)
            response.append(roleD)
        r = Result.success(response)
        return JsonResponse(r)
Exemplo n.º 9
0
def task_by_script_id(request, script_id):
    """
    根据脚本 id 获取任务
    :param script_id:
    :param request:
    :return:
    """
    work_path = os.getcwd()
    try:
        if request.method == 'GET':
            script = CrawlScript.objects.get(id=script_id)
            project = CrawlProject.objects.get(id=script.project_id)
            task = CrawlTask.objects.get(id=project.task_id)
            path = os.path.abspath(join(work_path, PROJECTS_FOLDER))
            script_name = script.name

            vo = model_to_dict(task)
            vo.__setitem__('path', path)
            vo.__setitem__('script_name', script.script_file)
            r = Result.success(vo)
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
    finally:
        os.chdir(work_path)
Exemplo n.º 10
0
def list_proxy_ip(request):
    """
    所有代理 ip
    :param request:
    :return:
    """
    try:
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            keyword = data.get('keyword')
            page = data.get('page', 1)
            size = data.get('size', 15)
            ip_type = data.get('ip_type')
            status = request.GET.get('status')
            proxy_ips = CrawlProxyIP.objects.filter(is_deleted=0)
            if keyword is not None:
                proxy_ips = proxy_ips.filter(ip__icontains=keyword)
            if ip_type is not None:
                proxy_ips = proxy_ips.filter(ip_type=ip_type)
            if status is not None:
                proxy_ips = proxy_ips.filter(status=status)
            total = proxy_ips.count()
            r = Result.success(page_helper(total, page, size, proxy_ips))
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 11
0
def edit_user(request, user_id):
    """
    修改用户
    :param user_id:
    :param request: request object
    :return: json
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        alert_options = data.get('alert_options')

        user = CrawlUser.objects.get(id=user_id)
        user.account = data.get('account')
        user.mobile = data.get('mobile', '')
        user.wx_account = data.get('wx_account')
        user.comment = data.get('comment', '')
        user.alert_enable = data.get('alert_enable', 0)
        user.save()

        role_ids = data.get('role_ids')

        CrawlUserRoleRel.objects.filter(user_id=user_id).update(is_deleted=1)
        for role_id in role_ids:
            CrawlUserRoleRel.objects.create(role_id=role_id,
                                            user_id=user_id)

        # 权限树写进 redis
        user_roles = CrawlUserRoleRel.objects.filter(user_id=user_id)
        crawl_redis.set('permission#user#{}'.format(user_id), json.dumps(build_permission_tree(user_roles)))

        r = Result.success(None)
        return JsonResponse(r)
Exemplo n.º 12
0
def project_build(request, project_name):
    """
    爬虫工程编译打包
    :param request: request object
    :param project_name: project name
    :return: json
    """
    path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER))
    project_path = join(path, project_name)
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        description = data['description']
        build_project(
            project_name,
            include_data=False if project_name != 'auto_login' else True)
        egg = find_egg(project_path)
        if not egg:
            return JsonResponse(Result.fail("编译打包失败"))
        built_at = timezone.now()
        if not CrawlProject.objects.filter(name=project_name):
            CrawlProject(name=project_name,
                         description=description,
                         built_at=built_at,
                         egg=egg).save()
            model = CrawlProject.objects.get(name=project_name)
        else:
            model = CrawlProject.objects.get(name=project_name, is_deleted=0)
            model.built_at = built_at
            model.egg = egg
            model.description = description
            model.save()
        data = model_to_dict(model)
        r = Result.success(data)
        return JsonResponse(r)
Exemplo n.º 13
0
def list_scripts(request):
    """
    某个爬虫工程节点爬虫脚本分布列表
    :param request:
    :return:
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        size = data.get('size', 15)
        page = data.get('page', 1)
        task_name = data.get("task_name")
        task_id = data.get("task_id")
        script_name = data.get("script_name")

        scripts = CrawlScript.objects.filter(is_deleted=0)
        if task_id:
            scripts = scripts.filter(task_id=task_id)
        if script_name:
            scripts = scripts.filter(name__contains=script_name)
        if task_name:
            scripts = scripts.filter(task_name__contains=task_name)

        scripts = scripts.order_by("-id")
        total = scripts.count()
        response = page_helper(total, page, size, scripts)
        results = response.get('results')
        for result in results:
            result.__setitem__('hosts', ','.join(get_hosts_by_script_id(result.get('id'))))
        r = Result.success(response)
        return JsonResponse(r)
Exemplo n.º 14
0
def script_stop(request):
    """
    启动脚本
    :param request:
    :return:
    """
    try:
        if request.method == 'POST':
            data_scripts = json.loads(request.body.decode('utf-8'))

            if not data_scripts:
                return JsonResponse(Result.fail("没有指定脚本"))

            for data_script in data_scripts:
                crawl_script = CrawlScript.objects.get(id=data_script["id"])
                host_list = get_hosts_by_script_id(crawl_script.id)
                for host in host_list:
                    engine = get_engine_by_ip(host)

                    args = {
                        "redis": '{{"host":"{}","port": {},"db":1,"password":"******"}}'.format(db_conf.redis_host,
                                                                                            str(db_conf.redis_port),
                                                                                            db_conf.redis_pwd),
                        "batch_id": ''
                    }
                    jobs = engine.cancel(crawl_script.project_name, crawl_script.name)
            r = Result.success(None)
            return JsonResponse(r)
    except Exception as err:
        r = Result.fail(err)
        return JsonResponse(r)
Exemplo n.º 15
0
def edit_script_cfg(request):
    """
    编辑爬虫脚本配置
    :param request: request object
    :return:
    """
    try:
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            spider_name = data['spider_name']
            script_name = data['script_name']
            apply_to_all = data['applyToAll']
            task_id = data['project_id']

            script_args = []
            for p in data.get('params'):
                if isinstance(p['args'], str):
                    p['args'] = json.loads(p['args'])
                script_args.append(p)
                if p.get('trigger'):
                    result, message = scheduler_helper.verify_cron(p.get('trigger'))
                    if not result:
                        raise Exception('参数错误: {}'.format(message))

            update_kwargs = {
                "trigger": data.get('trigger'),
                "hosts": data.get('hosts'),
                "args": json.dumps(script_args)}


            # 批量设置当前任务的所有脚本
            if apply_to_all:
                crawl_scripts = CrawlScript.objects.filter(task_id=task_id)
                crawl_scripts.update(**update_kwargs)
            else:
                crawl_scripts = CrawlScript.objects.get(name=spider_name, task_id=task_id)
                crawl_scripts.trigger = data.get('trigger')
                crawl_scripts.hosts = data.get('hosts')
                crawl_scripts.args = json.dumps(script_args)
                crawl_scripts.save()

            if 'params' in data and data['params']:
                args = data['params']
                # 设置每个爬虫脚本的执行参数,不同调度批次的爬虫运行参数使用md5区分
                for arg in args:
                    if apply_to_all:
                        for script in crawl_scripts:
                            v_arg = encrypt_kit.md5(json.dumps(arg))
                            crawl_redis.set("args#{}#{}".format(script.name, v_arg), json.dumps(arg['args']))
                    else:
                        v_arg = encrypt_kit.md5(json.dumps(arg))
                        crawl_redis.set("args#{}#{}".format(spider_name, v_arg), json.dumps(arg['args']))

            r = Result.success("")
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 16
0
def node_index(request):
    """
    获取节点列表
    :param request: request object
    :return: client list
    """
    if request.method == 'GET':
        data = CrawlNode.objects.filter(is_deleted=0).order_by('-id')
        r = Result.success(data)
        return JsonResponse(r)
Exemplo n.º 17
0
def task_create(request):
    """
    创建任务
    :param request: request object
    :return: Bool
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        task = CrawlTask.objects.create(**data)
        r = Result.success(model_to_dict(task))
        return JsonResponse(r)
Exemplo n.º 18
0
def node_info(request, client_id):
    """
    获取爬虫节点信息
    :param request: request object
    :param id: client id
    :return: json
    """
    if request.method == 'GET':
        data = model_to_dict(CrawlNode.objects.get(id=client_id))
        r = Result.success(data=data)
        return JsonResponse(r)
Exemplo n.º 19
0
def list_task_progress(request):
    """
    爬虫任务进度
    :param request:
    :return:
    """
    try:
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            keyword = data.get('keyword')
            script_name = data.get('script_name')
            date = data.get('date')
            status = data.get('status')
            page = data.get('page', 1)
            size = data.get('size', 15)

            task_progress = CrawlScriptProgress.objects.filter(is_deleted=0).exclude(script_name='proxy')

            condition_date = datetime.datetime.today().strftime('%Y-%m-%d') if date == '' else date
            stat_task_progress = task_progress.filter(start_time__gte='{} 00:00:00'.format(condition_date),
                                                      start_time__lte='{} 23:59:59'.format(condition_date))
            running_cnt = stat_task_progress.filter(status=1).count()
            success_cnt = stat_task_progress.filter(status=2).count()
            fail_cnt = stat_task_progress.filter(status=-1).count()

            if keyword is not None and keyword != '':
                task_progress = task_progress.filter(task_name__icontains=keyword)
            if script_name is not None and script_name != '':
                task_progress = task_progress.filter(script_name__icontains=script_name)
            if date is not None and date != '':
                task_progress = task_progress.filter(start_time__gte='{} 00:00:00'.format(date),
                                                     start_time__lte='{} 23:59:59'.format(date))
            if status is not None:
                task_progress = task_progress.filter(status__in=status)
            task_progress = task_progress.order_by("-id")

            total = task_progress.count()
            pager = page_helper(total, page, size, task_progress, {'fail_cnt': fail_cnt,
                                                                   'running_cnt': running_cnt,
                                                                   'success_cnt': success_cnt})
            convert_task_progress = []
            results = pager.get('results')
            for result in results:
                result['run_time'] = time_kit.convert_ms(result.get('run_time'))
                result['script_id'] = CrawlScript.objects.get(task_name=result.get('task_name'),
                                                              name=result.get('script_name')).id
                convert_task_progress.append(result)
            pager['results'] = convert_task_progress

            r = Result.success(pager)
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 20
0
def project_file_delete(request):
    """
    删除爬虫工程文件
    :param request: request object
    :return: result of delete
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        path = join(data['path'], data['label'])
        os.remove(path)
        return JsonResponse(Result.success(""))
Exemplo n.º 21
0
def project_file_rename(request):
    """
    重命名爬虫工程文件
    :param request: request object
    :return: result of rename
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        pre = join(data['path'], data['pre'])
        new = join(data['path'], data['new'])
        os.rename(pre, new)
        return JsonResponse(Result.success(""))
Exemplo n.º 22
0
def project_file_create(request):
    """
    创建爬虫工程文件
    :param request: request object
    :return: result of create
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        path = join(data['path'], data['name'])
        open(path, 'w', encoding='utf-8').close()
        r = Result.success("")
        return JsonResponse(r)
Exemplo n.º 23
0
def get_profile(request, user_id):
    """
    获取个人信息
    :param user_id:
    :param request:
    :return:
    """
    if request.method == 'GET':
        user = CrawlUser.objects.get(id=user_id)
        userD = model_to_dict(user)
        userD.__setitem__('created_at', user.created_at)
        r = Result.success(userD)
        return JsonResponse(r)
Exemplo n.º 24
0
def project_file_update(request):
    """
    修改爬虫工程文件
    :param request: request object
    :return: result of update
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        path = join(data['path'], data['label'])
        code = data['code']
        with open(path, 'w', encoding='utf-8') as f:
            f.write(code)
            r = Result.success("")
            return JsonResponse(r)
Exemplo n.º 25
0
def edit_profile(request, user_id):
    """
    编辑个人信息(TODO 这里产品原型上缺少了告警设置,告警信息也可以编辑)
    :param user_id:
    :param request:
    :return:
    """
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        CrawlUser.objects.filter(id=user_id).update(account=data.get('account'),
                                                    mobile=data.get('mobile'),
                                                    wx_account=data.get('wx_account'))
        r = Result.success(None)
        return JsonResponse(r)
Exemplo n.º 26
0
def node_manager(request):
    if request.method == 'GET':
        page = request.GET.get('page', 1)
        size = request.GET.get('size', 15)
        nodes = list(CrawlNode.objects.filter(is_deleted=0))
        total = len(nodes)
        output_nodes = []
        for client in nodes:
            # node_url = engine_url(client.node_ip, client.node_port)
            # node_url = engine_url('120.27.210.65', '6800')
            client_d = to_dict(client)
            output_nodes.append(client_d)
        r = Result.success(page_helper(total, page, size, output_nodes))
        return JsonResponse(r)
Exemplo n.º 27
0
def node_status(request, node_id):
    """
    获取某个爬虫节点的状态
    :param request: request object
    :param node_id: node_id id
    :return: json
    """
    if request.method == 'GET':
        # get client object
        client = CrawlNode.objects.get(id=node_id)
        try:
            requests.get(engine_url(client.ip, client.port), timeout=3)
            return JsonResponse(Result.success(""))
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)
Exemplo n.º 28
0
def get_proxy_ip(request, proxy_ip_id):
    """
    获取一个代理 ip
    :param request:
    :param proxy_ip_id:
    :return:
    """
    try:
        if request.method == 'GET':
            proxy_ip = CrawlProxyIP.objects.get(id=proxy_ip_id)
            r = Result.success(model_to_dict(proxy_ip))
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)
Exemplo n.º 29
0
def project_list(request, node_id):
    """
    获取某个node节点上的爬虫工程
    :param request: request object
    :param node_id: node_id
    :return: json
    """
    if request.method == 'GET':
        client = CrawlNode.objects.get(id=node_id)
        engine = get_engine(client)
        try:
            projects = engine.list_projects()
            JsonResponse(Result.success(data=projects))
        except ConnectionError:
            return JsonResponse(Result.fail())
Exemplo n.º 30
0
def create_proxy_ip(request):
    """
    创建代理 ip
    :param request:
    :return:
    """
    try:
        if request.method == 'POST':
            data = json.loads(request.body.decode('utf-8'))
            CrawlProxyIP.objects.create(**data)
            r = Result.success(None)
            return JsonResponse(r)
    except Exception as e:
        r = Result.fail(e)
        return JsonResponse(r)