def test_case06(self): """query:根据上次修改时间查询全部的scheduler""" end_time = get_time() # lastModifiedTime结束时间是当前时间 start_time = get_time() - (10 * 24 * 3600 * 1000 ) # lastModifiedTime开始时间是当前时间的十天前 data = { "fieldList": [{ "fieldName": "lastModifiedTime", "fieldValue": start_time, "comparatorOperator": "GREATER_THAN" }, { "fieldName": "lastModifiedTime", "fieldValue": end_time, "comparatorOperator": "LESS_THAN" }], "sortObject": { "field": "lastModifiedTime", "orderDirection": "DESC" }, "offset": 0, "limit": 8 } res = requests.post(url=self.query_scheduler_url, headers=get_headers(host), data=json.dumps(data)) query_results = dict_res(res.text) # print(res.text, query_results) first_Time = query_results["content"][0]["lastModifiedTime"] # print('first_one_lastModifiedTime:', first_Time) # 将查询结果中的第一个的lastModifiedTime和查询使用的开始时间,结束时间做对比,应该包含在二者之间 self.assertEqual(end_time > first_Time > start_time, True, "查询结果的lastModifiedTime不包含在起始时间内,查询结果不正确")
def test_case01(self): """创建schedulers,单次执行""" scheduler_name = 'api_auto_create_schedulers_once' + str( random.randint(0, 99999)) flow_table = load_workbook(abs_dir("flow_dataset_info.xlsx")) info_sheet = flow_table.get_sheet_by_name("flow_info") flow_id = info_sheet.cell(row=2, column=2).value flow_name = info_sheet.cell(row=2, column=3).value data = { "name": scheduler_name, "flowId": flow_id, "flowName": flow_name, "flowType": 'dataflow', "schedulerId": "once", "configurations": { "startTime": get_time(), "arguments": [], "cron": "once", "properties": [] } } res = requests.post(url=create_scheduler_url, headers=get_headers(), json=data) print(res.status_code, res.text) self.assertEqual(res.status_code, 201, '创建单次执行的scheduler失败: %s' % res.text) time.sleep(5)
def create_schedulers(): from basic_info.url_info import create_scheduler_url flow_name = get_flows()[0]["name"] flow_type = get_flows()[0]["flow_type"] data = { "configurations": { "arguments": [], "properties": [{ "name": "all.debug", "value": "false" }, { "name": "all.dataset-nullable", "value": "false" }, { "name": "all.lineage.enable", "value": "true" }, { "name": "all.notify-output", "value": "false" }, { "name": "all.debug-rows", "value": "20" }, { "name": "dataflow.master", "value": "yarn" }, { "name": "dataflow.deploy-mode", "value": "client" }, { "name": "dataflow.queue", "value": "a1" }, { "name": "dataflow.num-executors", "value": "2" }, { "name": "dataflow.driver-memory", "value": "512M" }, { "name": "dataflow.executor-memory", "value": "1G" }, { "name": "dataflow.executor-cores", "value": "2" }, { "name": "dataflow.verbose", "value": "true" }, { "name": "dataflow.local-dirs", "value": "" }, { "name": "dataflow.sink.concat-files", "value": "true" }], "startTime": get_time() }, "flowId": flow_id, "flowName": flow_name, "flowType": flow_type, "name": "students_flow" + str(random.randint(0, 99999)), "schedulerId": "once", "source": "rhinos" } res = requests.post(url=create_scheduler_url, headers=get_headers(), data=json.dumps(data)) # print(res.status_code, res.text) if res.status_code == 201 and res.text: scheduler_id_format = dict_res(res.text) try: scheduler_id = scheduler_id_format["id"] except KeyError as e: print("scheduler_id_format中存在异常%s" % e) else: return scheduler_id else: return None
def data_for_create_scheduler(self): """ 1. 根据flow_id 查找flow_name等信息 2. 根据查询到的flow信息,拼装创建scheduler所需要使用的data :return: data_list """ print("------组装创建任务所需要的data------\n") data_list = [] flow_id_list = self.get_flow_id() for flow_id in flow_id_list: try: sql = 'select name, flow_type, parameters from merce_flow where id = "%s"' % flow_id flow_info = self.ms.ExecuQuery(sql) print('flow_info:', flow_info) except Exception as e: return e else: try: flow_name = flow_info[0]["name"] flow_type = flow_info[0]["flow_type"] flow_parameters = flow_info[0]["parameters"] arguments_list = [] arguments = {} if flow_parameters: # flow parameters存在的情况下 print('parameters没有解压缩时:', flow_parameters) parameters_use = parameter_ungzip(flow_parameters) # 将加密后的参数进行解密和解压缩处理 print('parameters解压缩后:', parameters_use) flow_parameters_list = dict_res(parameters_use) if len(flow_parameters_list) > 0: arguments["name"] = flow_parameters_list[0]["name"] arguments["category"] = flow_parameters_list[0]["category"] arguments["value"] = flow_parameters_list[0]["defaultVal"] arguments["refs"] = flow_parameters_list[0]["refs"] arguments["description"] = flow_parameters_list[0]["description"] arguments_list.append(arguments) # print('arguments:', arguments) except KeyError as e: raise e except IndexError as T: raise T data = { "configurations": { "arguments": arguments_list, "properties": [ { "name": "all.debug", "value": "false" }, { "name": "all.dataset-nullable", "value": "false" }, { "name": "all.lineage.enable", "value": "true" }, { "name": "all.notify-output", "value": "false" }, { "name": "all.debug-rows", "value": "20" }, { "name": "dataflow.master", "value": "yarn" }, { "name": "dataflow.deploy-mode", "value": "client" }, { "name": "dataflow.queue", "value": "merce.normal" }, { "name": "dataflow.num-executors", "value": "2" }, { "name": "dataflow.driver-memory", "value": "512M" }, { "name": "dataflow.executor-memory", "value": "1G" }, { "name": "dataflow.executor-cores", "value": "2" }, { "name": "dataflow.verbose", "value": "true" }, { "name": "dataflow.local-dirs", "value": "" }, { "name": "dataflow.sink.concat-files", "value": "true" } ], "startTime": get_time() }, "flowId": flow_id, "flowName": flow_name, "flowType": flow_type, "name": flow_name + 'scheduler' + str(random.randint(0, 9999))+str(random.randint(0, 9999)), "schedulerId": "once", "source": "rhinos" } data_list.append(data) print("------返回创建任务的data------") # print(data_list) return data_list
def get_dataflow_data(flow_name): print("开始执行get_dataflow_data(flow_name)") ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"], MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"]) try: sql = 'select id, flow_type from merce_flow where name = "%s"' % flow_name flow_info = ms.ExecuQuery(sql) print(sql) print('flow_info:', flow_info) except Exception as e: raise e else: try: flow_id = flow_info[0]["id"] flow_type = flow_info[0]["flow_type"] # print(flow_name, flow_type) except KeyError as e: raise e data = { "configurations": { "arguments": [], "properties": [{ "name": "all.debug", "value": "false" }, { "name": "all.dataset-nullable", "value": "false" }, { "name": "all.lineage.enable", "value": "true" }, { "name": "all.notify-output", "value": "false" }, { "name": "all.debug-rows", "value": "20" }, { "name": "dataflow.master", "value": "yarn" }, { "name": "dataflow.deploy-mode", "value": "client" }, { "name": "dataflow.queue", "value": "merce.normal" }, { "name": "dataflow.num-executors", "value": "2" }, { "name": "dataflow.driver-memory", "value": "512M" }, { "name": "dataflow.executor-memory", "value": "1G" }, { "name": "dataflow.executor-cores", "value": "2" }, { "name": "dataflow.verbose", "value": "true" }, { "name": "dataflow.local-dirs", "value": "" }, { "name": "dataflow.sink.concat-files", "value": "true" }], "startTime": get_time() }, "flowId": flow_id, "flowName": flow_name, "flowType": flow_type, "name": flow_name + str(random.randint(0, 99999)), "schedulerId": "once", "source": "rhinos" } return data
def test_case02(self): """创建schedulers,周期执行""" scheduler_name = 'api_auto_create_schedulers_cron' + str( random.randint(0, 99999)) # start_time = get_time()+(600*1000) # starttime设为当前时间10分钟后 start_time = get_time() # starttime设为当前时间 end_time = get_time() + (24 * 3600 * 1000) # endtime设为当前时间1天后 flow_table = load_workbook(abs_dir("flow_dataset_info.xlsx")) info_sheet = flow_table.get_sheet_by_name("flow_info") flow_id = info_sheet.cell(row=2, column=2).value flow_name = info_sheet.cell(row=2, column=3).value data = { "name": scheduler_name, "flowId": flow_id, "flowName": flow_name, "flowType": 'dataflow', "schedulerId": "cron", "source": "rhinos", "configurations": { "arguments": [], "cron": "0 0 8 * * ? ", "cronType": "simple", "endTime": end_time, "properties": [{ "name": "all.debug", "value": "false" }, { "name": "all.dataset-nullable", "value": "false" }, { "name": "all.notify-output", "value": "false" }, { "name": "all.debug-rows", "value": "20" }, { "name": "dataflow.master", "value": "yarn" }, { "name": "dataflow.queue", "value": ["default"] }, { "name": "dataflow.num-executors", "value": "2" }, { "name": "dataflow.driver-memory", "value": "512M" }, { "name": "dataflow.executor-memory", "value": "1G" }, { "name": "dataflow.executor-cores", "value": "2" }, { "name": "dataflow.verbose", "value": "true" }, { "name": "dataflow.local-dirs", "value": "" }, { "name": "dataflow.sink.concat-files", "value": "true" }], "startTime": start_time } } res = requests.post(url=create_scheduler_url, headers=get_headers(), json=data) print(res.status_code, res.text) self.assertEqual(res.status_code, 201, '创建周期执行的scheduler失败:%s' % res.text) time.sleep(5)
def test_rule_update(self): """更新规则-SQL类型的name""" data = { "id": self.sql_rule_id, "name": self.test_rule_detail() + '_after_update' + str(random.randint(0, 99999)), "creator": "admin", "createTime": 1548389901000, "lastModifier": "admin", "lastModifiedTime": get_time(), "owner": "2059750c-a300-4b64-84a6-e8b086dbfd42", "version": 1, "moduleVersion": 0, "enabled": 1, "tenant": { "id": "2d7ad891-41c5-4fba-9ff2-03aef3c729e5", "name": "default", "creator": "root", "createTime": 1532942318000, "lastModifier": "f8aff341-9303-4135-b393-1d322e4638e2", "lastModifiedTime": 1544078372000, "owner": "f8aff341-9303-4135-b393-1d322e4638e2", "version": 0, "moduleVersion": 0, "enabled": 1, "resourceQueues": ["default", "merce.normal"], "hdfsSpaceQuota": 0, "zid": "", "expiredPeriod": 0 }, "buildType": "Custom", "customType": "SQL", "ruleClass": "com.merce.woven.app.metadata.rule.RuleCustomSQLValidation", "customValue": "$grade > 70", "priority": 1, "aggType": "None", "dataScope": "FieldsCombination", "fieldValueType": "Any", "ruleOption": { "paramsMap": { "inputGroup": [{ "name": "customSqlValidation", "vtype": "string", "defaultValue": "", "displayStr": "自定义 SQL 表达式", "required": True }], "outputGroup": [{ "name": "outputFields", "vtype": "MultiField", "defaultValue": "*", "displayStr": "输出字段", "required": True }, { "name": "qualityType", "vtype": "string", "defaultValue": "normal", "displayStr": "打分方式", "required": True, "valueOptions": ["normal", "ignore"] }, { "name": "outputLimit", "vtype": "BigInt", "defaultValue": "1000000", "displayStr": "坏数据行数限制", "required": False }] }, "outValueType": "Any", "outputFields": [] }, "regex": ".*", "regexFlag": 0, "expiredPeriod": 0 } response = requests.put(url=self.query_rule_detail_url, headers=get_headers(HOST_189), json=data) self.assertEqual(204, response.status_code, '更新规则接口调用失败,失败原因%s' % response.text)