Пример #1
0
def project_mapping(request):
    # 映射scrapyd的工程到记录表中
    result = {'status': 1, 'msg': None, 'data': None}
    if request.method == 'POST':
        user = User.objects.get(username=request.session['username'])
        node = Node.objects.get(nid=request.POST.get('nid'))
        projects = request.POST.get('projects').split(',')

        scrapyd = scrapyd_obj(uri(node.ip, node.port))
        if scrapyd:
            # 删除未部署的记录
            for project in set(projects) - set(scrapyd.list_projects()):
                if project:
                    # 删除工程
                    project_obj = Project.objects.get(name=project, node=node, user=user)
                    project_obj.delete()
                    # 删除作业
                    job_obj = Job.objects.filter(project=project, node=node.nid, user=user)
                    job_obj.delete()

            # 映射部署到记录表
            for project in scrapyd.list_projects():
                Project.objects.get_or_create(name=project, node=node, user=user)

                for spider in scrapyd.list_spiders(project):
                    job = Job.objects.get_or_create(name=spider, project=project, user=user, node=node.nid)
                    job[0].status = 1
                    job[0].save()

    result['msg'] = '映射执行成功!'

    return Response(result)
Пример #2
0
def job_view(request):
    # 查看作业
    result = {}
    if request.method == 'POST':
        data = request.POST.dict()
        nid, project, spider = data['data'].split(',')
        node = Node.objects.get(nid=nid)
        scrapyd = scrapyd_obj(uri(node.ip, node.port))
        if scrapyd:
            jobs = scrapyd.list_jobs(project)

            spiders_list = []
            for status, job in jobs.items():
                if status == 'node_name': continue
                for j in job:
                    if j['spider'] == spider:
                        j['start_time'] = j['start_time'][:19] if 'start_time' in j else ''
                        j['end_time'] = j['end_time'][:19] if 'end_time' in j else ''
                        spiders_list.append(j)
            result['spiders'] = spiders_list
            result['project'] = project
            result['node'] = node.ip
            result['port'] = node.port
            result['nid'] = node.nid
            return Response(result)
Пример #3
0
    def get_status(self, pk):
        # 节点状态
        try:
            node = Node.objects.get(nid=pk)
            resp = requests.get(uri(node.ip, node.port), timeout=0.1)
        except:
            return 0

        return 1 if resp.ok else 0
Пример #4
0
def log_view(request):
    result = {'status': 1, 'msg': None, 'data': None}
    data = request.GET.dict()
    node = Node.objects.get(nid=data['nid'])
    url = uri(node.ip, node.port) + '/logs/%s/%s/%s.log' % (data['project'], data['spider'], data['job'])
    head = requests.head(url)
    length = int(head.headers['Content-Length'])
    length = length if length < 10240 else 10240
    response = requests.get(url, headers={'Range': 'bytes=-%s' % length})
    txt = response.content.decode().split('\n')[-100:]
    result['data'] = ''.join(txt)
    return Response(result)
Пример #5
0
    def get(self, request):
        per = 10
        result = {'status': 1, 'msg': None, 'data': None}
        page = int(request.GET.get('page', 1))
        # 当前用户所有spider
        user = User.objects.get(username=request.session['username'])
        spiders_list = []
        projects_list = []
        nodes_cache = {}

        jobs_cache = {}
        for job in Job.objects.filter(user=user):

            # 获取节点
            if job.node in nodes_cache:
                node, scrapyd = nodes_cache[job.node]
            else:
                node = Node.objects.get(pk=job.node)
                scrapyd = scrapyd_obj(uri(node.ip, node.port))
            nodes_cache[job.node] = (node, scrapyd)

            if job.status == 1:
                spiders_list.append(
                    {
                        'node': job.node,
                        'project': job.project,
                        'spider': job.name,
                        'jid': job.jid,
                        'ip': node.ip,
                        'alive': job.alive
                    }
                )

            project_dict = {'ip': node.ip, 'project': job.project}
            if project_dict not in projects_list and job.status == 1:
                projects_list.append({'ip': node.ip, 'project': job.project})

        result['data'] = {}
        result['data']['pages'] = get_pages(len(spiders_list), per)
        page = result['data']['pages'] if page >= result['data']['pages'] else page
        result['data']['spiders'] = spiders_list[(page - 1) * per:page * per]
        result['data']['projects'] = projects_list

        return Response(result)
Пример #6
0
def job_mapping(request):
    # 修改爬虫状态
    result = {'status': 1, 'msg': None, 'data': None}
    if request.method == 'POST':
        user = User.objects.get(username=request.session['username'])
        node = Node.objects.get(nid=request.POST.get('nid'))
        project = Project.objects.get(pk=request.POST.get('id'))
        status = request.POST.get('status')
        scrapyd = scrapyd_obj(uri(node.ip, node.port))
        if scrapyd:
            spiders_list = scrapyd.list_spiders(project.name)
            for spider in spiders_list:
                job = Job.objects.get_or_create(name=spider, project=project.name, user=user, node=project.node.nid)
                job[0].status = status
                job[0].save()

            project.status = status
            project.save()
            result['msg'] = '修改成功!'
    return Response(result)
Пример #7
0
    def post(self, request):
        result = {'status': 1, 'msg': None, 'data': None}
        # 删除部署记录
        project = Project.objects.get(pk=request.POST.get('id'))
        node = Node.objects.get(nid=request.POST.get('nid'))
        scrapyd = scrapyd_obj(uri(node.ip, node.port))
        if scrapyd:
            if project.name in scrapyd.list_projects():
                # 删除部署的工程
                scrapyd.delete_project(project.name)

        if project.file:
            # 删除文件
            delete_file(project.file.path)
        else:
            delete_file(settings.MEDIA_ROOT + '/deploy/%s.zip' % project.name)

        # 删除记录
        project.delete()

        result['msg'] = '工程 %s 已经删除!' % project.name
        return Response(result)
Пример #8
0
def job_start(request):
    # 启动爬虫
    result = {}
    if request.method == 'POST':
        data = request.POST.dict()
        nid, project, spider = data['data'].split(',')

        node = Node.objects.get(nid=nid)
        scrapyd = scrapyd_obj(uri(node.ip, node.port))
        if scrapyd:
            result['jid'] = scrapyd.schedule(project, spider)
            # 修改状态
            job = Job.objects.get(user=User.objects.get(username=request.session['username']), project=project,
                                  name=spider)
            job.jid = result['jid']
            job.alive = 1
            job.save()

        result['node'] = nid
        result['project'] = project
        result['spider'] = spider
        return Response(result)
Пример #9
0
def job_stop(request):
    # 停止爬虫
    def _is_alive(scrapyd, project, jid):
        for job in scrapyd.list_jobs(project)['running']:
            if job['id'] == jid:
                return True

    result = {'status': 1, 'msg': None, 'data': None}
    data = {}
    if request.method == 'POST':
        data = request.POST.dict()
        nid, project, jid, spider = data['data'].split(',')
        node = Node.objects.get(nid=nid)
        scrapyd = scrapyd_obj(uri(node.ip, node.port))
        if scrapyd:
            scrapyd.cancel(project, jid)

            # 强制停止
            try:
                if _is_alive(scrapyd, project, jid):
                    scrapyd.cancel(project, jid, 'KILL')
            except:
                result['status'] = 0
                result['msg'] = '停止异常请强制停止'
            # 修改状态
            job = Job.objects.get(user=User.objects.get(username=request.session['username']), project=project,
                                  name=spider)
            job.jid = None
            job.alive = 0
            job.save()
        data['node'] = nid
        data['project'] = project
        data['spider'] = spider
        result['data'] = data

        return Response(result)