def project_configure(request, project_name): """ get configuration :param request: request object :param project_name: project name :return: json """ # get configuration if request.method == 'GET': project = Project.objects.get(name=project_name) project = model_to_dict(project) project['configuration'] = json.loads( project['configuration']) if project['configuration'] else None return JsonResponse(project) # update configuration elif request.method == 'POST': project = Project.objects.filter(name=project_name) data = json.loads(request.body) configuration = json.dumps(data.get('configuration')) project.update(**{'configuration': configuration}) # execute generate cmd cmd = ' '.join(['gerapy', 'generate', project_name]) p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) print('RETURN CODE', p.returncode) print('stdout', stdout) print('stderr', stderr) if not stderr: return JsonResponse({'status': '1'}) else: return JsonResponse({'status': '0', 'message': stderr})
def project_clone(request): """ clone project from github :param request: request object :return: json """ if request.method == 'POST': data = json.loads(request.body) address = data.get('address') if not address.startswith('http'): return JsonResponse({'status': False}) address = address + '.git' if not address.endswith('.git') else address cmd = [ 'git', 'clone', address, join(PROJECTS_FOLDER, Path(address).stem) ] logger.debug('clone cmd %s', cmd) p = Popen(cmd, shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) logger.debug('clone run result %s', stdout) if stderr: logger.error(stderr) return JsonResponse({'status': True}) if not stderr else JsonResponse( {'status': False})
def project_configure(request, project_name): """ get configuration :param request: request object :param project_name: project name :return: json """ # get configuration if request.method == 'GET': project = Project.objects.get(name=project_name) project = model_to_dict(project) project['configuration'] = json.loads( project['configuration']) if project['configuration'] else None return JsonResponse(project) # update configuration elif request.method == 'POST': project = Project.objects.filter(name=project_name) data = json.loads(request.body) configuration = json.dumps(data.get('configuration'), ensure_ascii=False) project.update(**{'configuration': configuration}) # for safe protection project_name = re.sub('[\s\!\@\#\$\;\&\*\~\"\'\{\}\]\[\-\+\%\^]+', '', project_name) # execute generate cmd cmd = ['gerapy', 'generate', project_name] p = Popen(cmd, shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) if not stderr: return JsonResponse({'status': '1'}) else: return JsonResponse({'status': '0', 'message': stderr})
def project_parse(request, project_name): """ parse project :param request: request object :param project_name: project name :return: requests, items, response """ if request.method == 'POST': project_path = join(PROJECTS_FOLDER, project_name) data = json.loads(request.body) logger.debug('post data %s', data) spider_name = data.get('spider') args = { 'start': data.get('start', False), 'method': data.get('method', 'GET'), 'url': data.get('url'), 'callback': data.get('callback'), 'cookies': "'" + json.dumps(data.get('cookies', {}), ensure_ascii=False) + "'", 'headers': "'" + json.dumps(data.get('headers', {}), ensure_ascii=False) + "'", 'meta': "'" + json.dumps(data.get('meta', {}), ensure_ascii=False) + "'", 'dont_filter': data.get('dont_filter', False), 'priority': data.get('priority', 0), } # set request body body = data.get('body', '') if args.get('method').lower() != 'get': args['body'] = "'" + json.dumps(body, ensure_ascii=False) + "'" args_array = [] for arg, value in args.items(): args_array.append(f'--{arg}') args_array.append(f'{value}') cmd = ['gerapy', 'parse'] + args_array + [project_path] + [spider_name] logger.debug('parse cmd %s', cmd) p = Popen(cmd, shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) logger.debug('stdout %s, stderr %s', stdout, stderr) if not stderr: return JsonResponse({'status': True, 'result': json.loads(stdout)}) else: return JsonResponse({'status': False, 'message': stderr})
def project_parse(request, project_name): """ parse project :param request: request object :param project_name: project name :return: requests, items, response """ if request.method == 'POST': print(project_name) project_path = join(PROJECTS_FOLDER, project_name) print('Project Path', project_path) data = json.loads(request.body) spider_name = data.get('spider') # start = data.get('start', 0) # method = data.get('method', 'GET') # headers = data.get('headers', {}) # meta = data.get('meta', {}) # url = data.get('url') # callback = data.get('callback') # construct args cmd args = { 'start': data.get('start', 0), 'method': data.get('method', 'GET'), 'url': data.get('url'), 'callback': data.get('callback') } # args = ['start', 'method', 'url', 'callback'] args_cmd = ' '.join([ '--{arg} {value}'.format(arg=arg, value=value) if value else '' for arg, value in args.items() ]) print(args_cmd) cmd = 'gerapy parse {args_cmd} {project_path} {spider_name}'.format( args_cmd=args_cmd, project_path=project_path, spider_name=spider_name) print(cmd) p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) print('stdout', stdout) print('stderr', stderr) if not stderr: return JsonResponse({'status': '1', 'result': json.loads(stdout)}) else: return JsonResponse({'status': '0', 'message': stderr})