예제 #1
0
파일: views.py 프로젝트: jinuoA/spider
def job_log(request, client_id, project_name, spider_name, job_id):
    """
    get log of jog
    :param request: request object
    :param client_id: client id
    :param project_name: project name
    :param spider_name: spider name
    :param job_id: job id
    :return: log of job
    """
    if request.method == 'GET':
        node = Node.objects.get(id=client_id)
        # get log url
        url = log_url(node.ip, node.port, project_name, spider_name, job_id)
        try:
            # get last 1000 bytes of log
            response = requests.get(
                url,
                timeout=5,
                headers={'Range': 'bytes=-1000'},
                auth=(node.username, node.password) if node.auth else None)
            # Get encoding
            encoding = response.apparent_encoding
            # log not found
            if response.status_code == 404:
                return JsonResponse({'message': 'Log Not Found'}, status=404)
            # bytes to string
            text = response.content.decode(encoding, errors='replace')
            return HttpResponse(text)
        except requests.ConnectionError:
            return JsonResponse({'message': 'Load Log Error'}, status=500)
예제 #2
0
파일: views.py 프로젝트: jinuoA/spider
def project_version(request, node_id, project_name):
    """
    get project deploy version
    :param request: request object
    :param node_id: node id
    :param project_name: project name
    :return: deploy version of project
    """
    if request.method == 'GET':
        # get node and project model
        node = Node.objects.get(id=node_id)
        project = Project.objects.get(name=project_name)
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        # if deploy info exists in db, return it
        if Deploy.objects.filter(node=node, project=project):
            deploy = Deploy.objects.get(node=node, project=project)
        # if deploy info does not exists in db, create deploy info
        else:
            try:
                versions = scrapyd.list_versions(project_name)
            except ConnectionError:
                return JsonResponse({'message': 'Connect Error'}, status=500)
            if len(versions) > 0:
                version = versions[-1]
                deployed_at = timezone.datetime.fromtimestamp(
                    int(version), tz=pytz.timezone(TIME_ZONE))
            else:
                deployed_at = None
            deploy, result = Deploy.objects.update_or_create(
                node=node, project=project, deployed_at=deployed_at)
        # return deploy json info
        return JsonResponse(model_to_dict(deploy))
예제 #3
0
파일: views.py 프로젝트: jinuoA/spider
def project_build(request, project_name):
    """
    get build info or execute build operation
    :param request: request object
    :param project_name: project name
    :return: json
    """
    # get project folder
    path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER))
    project_path = join(path, project_name)
    # get build version
    if request.method == 'GET':
        egg = find_egg(project_path)
        # if built, save or update project to db
        if egg:
            built_at = timezone.datetime.fromtimestamp(
                os.path.getmtime(join(project_path, egg)),
                tz=pytz.timezone(TIME_ZONE))
            if not Project.objects.filter(spider_name=project_name):
                Project(spider_name=project_name, built_at=built_at,
                        egg=egg).save()
                model = Project.objects.get(spider_name=project_name)
            else:
                model = Project.objects.get(spider_name=project_name)
                model.built_at = built_at
                model.egg = egg
                model.save()

        else:  # if not built, just save project name to db
            if not Project.objects.filter(spider_name=project_name):
                Project(name=project_name).save()
            model = Project.objects.get(spider_name=project_name)
        # transfer model to dict then dumps it to json
        data = model_to_dict(model)
        return JsonResponse(data)
    # build operation manually by clicking button
    elif request.method == 'POST':
        # data = json.loads(request.body.decode('utf-8'))
        # description = data['spider_desc']
        build_project(project_name)
        egg = find_egg(project_path)
        # update built_at info
        built_at = timezone.now()
        # if project does not exists in db, create it
        if not Project.objects.filter(spider_name=project_name):
            Project(name=project_name,
                    description=Project.spider_desc,
                    built_at=built_at,
                    egg=egg).save()
            model = Project.objects.get(spider_name=project_name)
        # if project exists, update egg, description, built_at info
        else:
            model = Project.objects.get(spider_name=project_name)
            model.built_at = built_at
            model.egg = egg
            # model.description = description
            model.save()
        # transfer model to dict then dumps it to json
        # data = model_to_dict(model)
        return JsonResponse({"result": 1})
예제 #4
0
파일: views.py 프로젝트: jinuoA/spider
def project_deploy(request, node_id, project_name):
    if request.method == 'POST':
        # get project folder
        path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER))
        project_path = join(path, project_name)
        # find egg file
        egg = find_egg(project_path)
        egg_file = open(join(project_path, egg), 'rb')
        # get node and project model
        node = Node.objects.get(id=node_id)
        project = Project.objects.get(spider_name=project_name)
        # execute deploy operation
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        try:
            scrapyd.add_version(project_name, int(time.time()),
                                egg_file.read())
            # update deploy info
            deployed_at = datetime.datetime.now()
            deployed_at = deployed_at.strftime("%Y-%m-%d %H:%M:%S")
            Deploy.objects.filter(node=node, project=project).delete()
            deploy, result = Deploy.objects.update_or_create(
                node=node,
                project=project,
                deployed_at=deployed_at,
                description=project.spider_desc)
            return JsonResponse({'result': 1, "deploy": model_to_dict(deploy)})
        except Exception:
            return JsonResponse({'message': get_traceback()}, status=500)
예제 #5
0
def failUrlList(request):
    if request.method == "GET":
        try:
            failUrlList = Fail_url_detail.objects.all().order_by('-save_time')
            return JsonResponse({'failUrlList': failUrlList})
        except:
            return JsonResponse({'message': 'not data'})
예제 #6
0
def addTemplate(request):
    if request.method == "POST":
        data = json.loads(request.body.decode('utf-8'))
        item_list_xpath = data['item_list_xpath']
        item_title_xpath = data['item_title_xpath']
        item_url_xpath = data['item_url_xpath']
        item_publishdata_xpath = data['item_publishdata_xpath']
        next_page_xpath = data['next_page_xpath']
        select_template = data['select_template']

        if next_page_xpath and item_list_xpath and item_title_xpath and item_url_xpath and item_publishdata_xpath:
            templateList = SpiderTemplates.objects.filter(
                tem_type=select_template).values()
            template = templateList.first().get('tem_text')
            next_page = re.findall(r'next_page_xpath = u"(.*)"', template)
            item_list = re.findall(r'list_xpath = "(.*)"', template)
            item_title = re.findall(r'title_xpath = "(.*)"', template)
            item_url = re.findall(r'url_xpath = "(.*)"', template)
            item_publishdata = re.findall(r'pdate_xpath = "(.*)"', template)
            func = template.replace(next_page[0], next_page_xpath).replace(
                item_list[0], item_list_xpath).replace(
                    item_title[0], item_title_xpath).replace(
                        item_url[0],
                        item_url_xpath).replace(item_publishdata[0],
                                                item_publishdata_xpath)
            print(func)
            try:
                return JsonResponse({'func': func})
            except:
                return JsonResponse({'messages': 'input error'})
        else:
            return JsonResponse({'messages': 'input error'})
예제 #7
0
파일: views.py 프로젝트: jinuoA/spider
 def task():
     node = Node.objects.get(id=node_id)
     scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
     try:
         job = scrapyd.schedule(project_name, spider_name)
         return JsonResponse({'job': job})
     except ConnectionError:
         return JsonResponse({'message': 'Connect Error'}, status=500)
예제 #8
0
파일: views.py 프로젝트: jinuoA/spider
def remove_all_version(request, project, client_id):
    node = Node.objects.get(id=client_id)
    scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
    try:
        versions = scrapyd.delete_project(project)
        return JsonResponse(versions)
    except ConnectionError:
        return JsonResponse({'message': 'Connet Error'}, status=500)
예제 #9
0
파일: views.py 프로젝트: jinuoA/spider
def remove_depody_spider(request, client_id, project, version_name):
    if request.method == 'POST':
        node = Node.objects.get(id=client_id)
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        try:
            spider = scrapyd.delete_version(project, version_name)
            return JsonResponse(spider)
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)
예제 #10
0
파일: views.py 프로젝트: jinuoA/spider
def project_generate(request, project_name):
    """
    generate code of project
    :param request: request object
    :param project_name: project name
    :return: json of generated project
    """
    if request.method == 'POST':
        # get configuration
        configuration = Project.objects.get(name=project_name).configuration
        configuration = json.loads(configuration)

        if not is_valid_name(project_name):
            return JsonResponse({'message': 'Invalid project name'},
                                status=500)
        # remove original project dir
        project_dir = join(PROJECTS_FOLDER, project_name)
        if exists(project_dir):
            rmtree(project_dir)
        # generate project
        copy_tree(join(TEMPLATES_DIR, 'project'), project_dir)
        move(join(PROJECTS_FOLDER, project_name, 'module'),
             join(project_dir, project_name))
        for paths in TEMPLATES_TO_RENDER:
            path = join(*paths)
            tplfile = join(
                project_dir,
                string.Template(path).substitute(project_name=project_name))
            vars = {
                'project_name': project_name,
                'items': configuration.get('items'),
            }
            render_template(tplfile, tplfile.rstrip('.tmpl'), **vars)
        # generate spider
        spiders = configuration.get('spiders')
        for spider in spiders:
            source_tpl_file = join(TEMPLATES_DIR, 'spiders', 'crawl.tmpl')
            new_tpl_file = join(PROJECTS_FOLDER, project_name, project_name,
                                'spiders', 'crawl.tmpl')
            spider_file = "%s.py" % join(PROJECTS_FOLDER, project_name,
                                         project_name, 'spiders',
                                         spider.get('name'))
            copy(source_tpl_file, new_tpl_file)
            render_template(new_tpl_file,
                            spider_file,
                            spider=spider,
                            project_name=project_name)
        # save generated_at attr
        model = Project.objects.get(name=project_name)
        model.generated_at = timezone.now()
        # clear built_at attr
        model.built_at = None
        model.save()
        # return model
        return JsonResponse(model_to_dict(model))
예제 #11
0
파일: views.py 프로젝트: jinuoA/spider
def get_spider_version(request, project, client_id):
    node = Node.objects.get(id=client_id)
    scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
    try:
        spiders = scrapyd.list_spiders(project)
        spiders = [{
            'name': spider,
            'id': index + 1
        } for index, spider in enumerate(spiders)]
        return JsonResponse(spiders)
    except ConnectionError:
        return JsonResponse({'message': 'Connect Error'}, status=500)
예제 #12
0
파일: views.py 프로젝트: jinuoA/spider
def get_project_version(request, project, node_id):
    if request.method == 'GET':
        print('ssss')
        node = Node.objects.get(id=node_id)
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        try:
            versions = scrapyd.list_versions(project)
            versions = [{
                'name': version,
                'id': index + 1
            } for index, version in enumerate(versions)]
            return JsonResponse(versions)
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)
예제 #13
0
파일: views.py 프로젝트: jinuoA/spider
def node_status(request, node_id):
    """
    get node status
    :param request: request object
    :param node_id: node id
    :return: json
    """
    if request.method == 'GET':
        # get node object
        node = Node.objects.get(id=node_id)
        try:
            requests.get(scrapyd_url(node.node_ip, node.node_port), timeout=3)
            return JsonResponse({'result': '1'})
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)
예제 #14
0
파일: views.py 프로젝트: jinuoA/spider
def ruler_update(request, ruler_id):
    if request.method == 'POST':
        spider = ProjectRuler.objects.filter(id=ruler_id)
        data = json.loads(request.body.decode('utf-8'))
        spider.update(**data)
        return JsonResponse(
            model_to_dict(ProjectRuler.objects.get(id=ruler_id)))
예제 #15
0
파일: views.py 프로젝트: jinuoA/spider
def paginator(request, obje):
    if request.method == "POST":
        contact_list = eval(obje).objects.all().order_by('id')
        data = json.loads(request.body.decode('utf-8'))
        page = data['page']
        page_num = data['page_num']

        paginator = Paginator(contact_list,
                              page_num)  # Show 25 contacts per page
        try:
            contacts = paginator.page(page)
            contacts = [
                dict(x["fields"], **{"id": x["pk"]})
                for x in json.loads((serialize("json", contacts)))
            ]
        except PageNotAnInteger:
            # If page is not an integer, deliver first page.
            contacts = paginator.page(1)
            contacts = [
                dict(x["fields"], **{"id": x["pk"]})
                for x in json.loads((serialize("json", contacts)))
            ]
        except EmptyPage:
            # If page is out of range (e.g. 9999), deliver last page of results.
            contacts = paginator.page(paginator.num_pages)
            contacts = [
                dict(x["fields"], **{"id": x["pk"]})
                for x in json.loads((serialize("json", contacts)))
            ]
        return JsonResponse(contacts)
예제 #16
0
파일: views.py 프로젝트: jinuoA/spider
def ruler_indexs(request):
    """
    get project list
    :param request: request object
    :return: node list
    """
    if request.method == "GET":
        PR = ProjectRuler.objects.select_related('scheduler',
                                                 'project').filter(is_lock=1)
        lis = []
        for p in PR:
            if p.project != None:
                project_name = p.project.spider_name
            else:
                project_name = "未部署"
            if p.scheduler != None:
                schduler_name = p.scheduler.schedule_name
            else:
                schduler_name = "未添加调度"
            id = p.id
            project_desc = p.project_desc
            dept_id = p.dept_id
            url = p.url
            data = {
                "id": id,
                "schduler_name": schduler_name,
                "spider_name": project_name,
                "project_desc": project_desc,
                "dept_id": dept_id,
                "url": url,
            }
            lis.append(data)
        return JsonResponse(lis)
예제 #17
0
파일: views.py 프로젝트: jinuoA/spider
def delete_spider_scheduler(request):
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        ids = data['ids']
        spider = ProjectRuler.objects.filter(id(ids))
        spider.update(Scheduler_id=None)
        return JsonResponse({'result': 1})
예제 #18
0
파일: views.py 프로젝트: jinuoA/spider
def scheduler_update(request, scheduler_id):
    if request.method == 'POST':
        scheder = Scheduler.objects.filter(id=scheduler_id)
        data = json.loads(request.body.decode('utf-8'))
        scheder.update(**data)
        return JsonResponse(
            model_to_dict(Scheduler.objects.get(id=scheduler_id)))
예제 #19
0
파일: views.py 프로젝트: jinuoA/spider
def scheduler_run_ruler(request, scheduler_id):
    """
    start scheduler task
    :param request:
    :param scheduler_id:
    :return:
    """
    if request.method == "GET":
        from zzh.scheduler.sched import reload_runnable_spider_job_execution
        schedul.add_job(reload_runnable_spider_job_execution,
                        'interval',
                        minutes=2,
                        id='my_scheduler_job')
        schedule = Scheduler.objects.get(id=scheduler_id)
        spider_time = schedule.spider_time
        print(spider_time)
        # try:
        #     while True:
        #         schedul.start()
        #         time.sleep(2)
        # except (KeyboardInterrupt, SystemExit):
        #     schedul.shutdown()
        #     schedul.start()
        schedul.start()
        return JsonResponse({'result': 1})
예제 #20
0
파일: views.py 프로젝트: jinuoA/spider
def index_status(request):
    """
    index statistics
    :param request: request object
    :return: json
    """
    if request.method == 'GET':
        nodes = Node.objects.all()
        data = {
            'success': 0,
            'error': 0,
            'project': 0,
        }
        # nodes info
        for node in nodes:
            try:
                requests.get(scrapyd_url(node.node_ip, node.node_port),
                             timeout=1)
                data['success'] += 1
            except ConnectionError:
                data['error'] += 1
        path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER))
        files = os.listdir(path)
        # projects info
        for file in files:
            if os.path.isdir(join(path, file)) and not file in IGNORES:
                data['project'] += 1
        return JsonResponse(data)
예제 #21
0
def taskUrlSearch(request):
    if request.method == "POST":
        data = json.loads(request.body.decode('utf-8'))
        search_keywords = data.get('dept_name_key') or data.get(
            'item_pulishdate')
        if search_keywords:
            all_orgs = Task_url.objects.filter(
                Q(dept_name_key=search_keywords)
                | Q(item_pulishdate__contains=search_keywords))
            all_orgs = json.loads(serialize('json', all_orgs))
            all_orgs = [dict(x["fields"], **{"id": x["pk"]}) for x in all_orgs]
            try:
                return JsonResponse(all_orgs)
            except:
                return JsonResponse({'message': 'data does not exist'})
        else:
            return JsonResponse({'message': 'Error'})
예제 #22
0
파일: views.py 프로젝트: jinuoA/spider
def search(request):
    if request.method == "POST":
        # # all_search = ProjectRuler.objects.all()
        data = json.loads(request.body.decode('utf-8'))
        search_keywords = data.get('dept_id') or data.get(
            'dept_name_key') or data.get('project_desc') or data.get('is_lock')
        if search_keywords:
            all_orgs = ProjectRuler.objects.filter(
                Q(project_desc__icontains=search_keywords) | Q(dept_name_key__icontains=search_keywords) \
                | Q(dept_id__icontains=search_keywords))
            try:
                while True:
                    return JsonResponse(all_orgs)
            except:
                return JsonResponse({'message': 'Project does not exist'})
        else:
            return JsonResponse({'message': 'Error'})
예제 #23
0
파일: views.py 프로젝트: jinuoA/spider
def spider_start(request, node_id, project_name, spider_name):
    """
    start a spider
    :param request: request object
    :param node_id: node id
    :param project_name: project name
    :param spider_name: spider name
    :return: json
    """
    if request.method == 'GET':
        node = Node.objects.get(id=node_id)
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        try:
            job = scrapyd.schedule(project_name, spider_name)
            return JsonResponse({'job': job, "result": 1})
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)
예제 #24
0
파일: views.py 프로젝트: jinuoA/spider
def job_cancel(request, node_id, project_name, job_id):
    """
    cancel a job
    :param request: request object
    :param node_id: node id
    :param project_name: project name
    :param job_id: job id
    :return: json of cancel
    """
    if request.method == 'GET':
        node = Node.objects.get(id=node_id)
        try:
            scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
            res = scrapyd.cancel(project_name, job_id)
            return JsonResponse({"res": res, "result": 1})
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'})
예제 #25
0
파일: views.py 프로젝트: jinuoA/spider
def delete_project(request):
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        ids = data['ids']
        spider_id = data['spider_id']
        for id in ids:
            spider = ProjectRuler.objects.filter(id=id)
            spider.update(project_id=None)
        return JsonResponse({'result': 1})
예제 #26
0
def FailUrlSearch(request):
    if request.method == "POST":
        data = json.loads(request.body.decode('utf-8'))
        search_keywords = data.get('save_time') or data.get(
            'queue_url') or data.get('spider_url') or data.get(
                'status_code') or data.get('dept_id')
        if search_keywords:
            all_orgs = Fail_url_detail.objects.filter(
                Q(save_time__icontains=search_keywords) | Q(queue_url__icontains=search_keywords) \
                | Q(spider_url__icontains=search_keywords) |Q(status_code__icontains=search_keywords) \
                | Q(dept_id__icontains=search_keywords))
            try:
                while True:
                    return JsonResponse(all_orgs)
            except:
                return JsonResponse({'message': 'Project does not exist'})
        else:
            return JsonResponse({'message': 'Error'})
예제 #27
0
파일: views.py 프로젝트: jinuoA/spider
def add_spider_scheduler(request):
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        ids = data['ids']
        scheduler_id = data[
            'scheduler_id']  # scheduler_id = data['scheduler_id']    //修改成 enabled   值为0表示不定时,1定时
        spider = ProjectRuler.objects.filter(id(ids))
        spider.update(Scheduler_id=scheduler_id)
        return JsonResponse({'result': 1})
예제 #28
0
파일: views.py 프로젝트: jinuoA/spider
def node_info(request, node_id):
    """
    get node info
    :param request: request object
    :param id: node id
    :return: json
    """
    if request.method == 'GET':
        return JsonResponse(model_to_dict(Node.objects.get(id=node_id)))
예제 #29
0
파일: views.py 프로젝트: jinuoA/spider
def add_project_scheduler(request):
    if request.method == 'POST':
        data = json.loads(request.body.decode('utf-8'))
        ids = data['ids']
        scheduler_id = data['scheduler_id']
        for id in ids:
            spider = ProjectRuler.objects.filter(id=id, is_lock=1)
            spider.update(scheduler_id=scheduler_id)
        return JsonResponse({'result': 1})
예제 #30
0
파일: views.py 프로젝트: jinuoA/spider
def project_list(request, node_id):
    """
    project deployed list on one node
    :param request: request object
    :param node_id: node id
    :return: json
    """
    if request.method == 'GET':
        node = Node.objects.get(id=node_id)
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        try:
            projects = scrapyd.list_projects()
            lis = []
            for project in projects:
                lis.append({'spider_name': project})
            return JsonResponse({'result': 1, 'lis': lis})
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)