示例#1
0
    def test_case06(self):
        """query:根据上次修改时间查询全部的scheduler"""
        end_time = get_time()  # lastModifiedTime结束时间是当前时间
        start_time = get_time() - (10 * 24 * 3600 * 1000
                                   )  # lastModifiedTime开始时间是当前时间的十天前
        data = {
            "fieldList": [{
                "fieldName": "lastModifiedTime",
                "fieldValue": start_time,
                "comparatorOperator": "GREATER_THAN"
            }, {
                "fieldName": "lastModifiedTime",
                "fieldValue": end_time,
                "comparatorOperator": "LESS_THAN"
            }],
            "sortObject": {
                "field": "lastModifiedTime",
                "orderDirection": "DESC"
            },
            "offset":
            0,
            "limit":
            8
        }
        res = requests.post(url=self.query_scheduler_url,
                            headers=get_headers(host),
                            data=json.dumps(data))

        query_results = dict_res(res.text)
        # print(res.text, query_results)
        first_Time = query_results["content"][0]["lastModifiedTime"]
        # print('first_one_lastModifiedTime:', first_Time)
        # 将查询结果中的第一个的lastModifiedTime和查询使用的开始时间,结束时间做对比,应该包含在二者之间
        self.assertEqual(end_time > first_Time > start_time, True,
                         "查询结果的lastModifiedTime不包含在起始时间内,查询结果不正确")
示例#2
0
 def test_case01(self):
     """创建schedulers,单次执行"""
     scheduler_name = 'api_auto_create_schedulers_once' + str(
         random.randint(0, 99999))
     flow_table = load_workbook(abs_dir("flow_dataset_info.xlsx"))
     info_sheet = flow_table.get_sheet_by_name("flow_info")
     flow_id = info_sheet.cell(row=2, column=2).value
     flow_name = info_sheet.cell(row=2, column=3).value
     data = {
         "name": scheduler_name,
         "flowId": flow_id,
         "flowName": flow_name,
         "flowType": 'dataflow',
         "schedulerId": "once",
         "configurations": {
             "startTime": get_time(),
             "arguments": [],
             "cron": "once",
             "properties": []
         }
     }
     res = requests.post(url=create_scheduler_url,
                         headers=get_headers(),
                         json=data)
     print(res.status_code, res.text)
     self.assertEqual(res.status_code, 201,
                      '创建单次执行的scheduler失败: %s' % res.text)
     time.sleep(5)
示例#3
0
def create_schedulers():
    from basic_info.url_info import create_scheduler_url
    flow_name = get_flows()[0]["name"]
    flow_type = get_flows()[0]["flow_type"]
    data = {
        "configurations": {
            "arguments": [],
            "properties": [{
                "name": "all.debug",
                "value": "false"
            }, {
                "name": "all.dataset-nullable",
                "value": "false"
            }, {
                "name": "all.lineage.enable",
                "value": "true"
            }, {
                "name": "all.notify-output",
                "value": "false"
            }, {
                "name": "all.debug-rows",
                "value": "20"
            }, {
                "name": "dataflow.master",
                "value": "yarn"
            }, {
                "name": "dataflow.deploy-mode",
                "value": "client"
            }, {
                "name": "dataflow.queue",
                "value": "a1"
            }, {
                "name": "dataflow.num-executors",
                "value": "2"
            }, {
                "name": "dataflow.driver-memory",
                "value": "512M"
            }, {
                "name": "dataflow.executor-memory",
                "value": "1G"
            }, {
                "name": "dataflow.executor-cores",
                "value": "2"
            }, {
                "name": "dataflow.verbose",
                "value": "true"
            }, {
                "name": "dataflow.local-dirs",
                "value": ""
            }, {
                "name": "dataflow.sink.concat-files",
                "value": "true"
            }],
            "startTime":
            get_time()
        },
        "flowId": flow_id,
        "flowName": flow_name,
        "flowType": flow_type,
        "name": "students_flow" + str(random.randint(0, 99999)),
        "schedulerId": "once",
        "source": "rhinos"
    }
    res = requests.post(url=create_scheduler_url,
                        headers=get_headers(),
                        data=json.dumps(data))
    # print(res.status_code, res.text)
    if res.status_code == 201 and res.text:
        scheduler_id_format = dict_res(res.text)
        try:
            scheduler_id = scheduler_id_format["id"]
        except KeyError as e:
            print("scheduler_id_format中存在异常%s" % e)
        else:
            return scheduler_id
    else:
        return None
 def data_for_create_scheduler(self):
     """
     1. 根据flow_id 查找flow_name等信息
     2. 根据查询到的flow信息,拼装创建scheduler所需要使用的data
     :return: data_list
     """
     print("------组装创建任务所需要的data------\n")
     data_list = []
     flow_id_list = self.get_flow_id()
     for flow_id in flow_id_list:
         try:
             sql = 'select name, flow_type, parameters from merce_flow where id = "%s"' % flow_id
             flow_info = self.ms.ExecuQuery(sql)
             print('flow_info:', flow_info)
         except Exception as e:
             return e
         else:
             try:
                 flow_name = flow_info[0]["name"]
                 flow_type = flow_info[0]["flow_type"]
                 flow_parameters = flow_info[0]["parameters"]
                 arguments_list = []
                 arguments = {}
                 if flow_parameters:  # flow parameters存在的情况下
                     print('parameters没有解压缩时:', flow_parameters)
                     parameters_use = parameter_ungzip(flow_parameters)  # 将加密后的参数进行解密和解压缩处理
                     print('parameters解压缩后:', parameters_use)
                     flow_parameters_list = dict_res(parameters_use)
                     if len(flow_parameters_list) > 0:
                         arguments["name"] = flow_parameters_list[0]["name"]
                         arguments["category"] = flow_parameters_list[0]["category"]
                         arguments["value"] = flow_parameters_list[0]["defaultVal"]
                         arguments["refs"] = flow_parameters_list[0]["refs"]
                         arguments["description"] = flow_parameters_list[0]["description"]
                         arguments_list.append(arguments)
                     # print('arguments:', arguments)
             except KeyError as e:
                 raise e
             except IndexError as T:
                 raise T
         data = {
             "configurations": {
                 "arguments": arguments_list,
                 "properties": [
                     {
                         "name": "all.debug",
                         "value": "false"
                     },
                     {
                         "name": "all.dataset-nullable",
                         "value": "false"
                     },
                     {
                         "name": "all.lineage.enable",
                         "value": "true"
                     },
                     {
                         "name": "all.notify-output",
                         "value": "false"
                     },
                     {
                         "name": "all.debug-rows",
                         "value": "20"
                     },
                     {
                         "name": "dataflow.master",
                         "value": "yarn"
                     },
                     {
                         "name": "dataflow.deploy-mode",
                         "value": "client"
                     },
                     {
                         "name": "dataflow.queue",
                         "value": "merce.normal"
                     },
                     {
                         "name": "dataflow.num-executors",
                         "value": "2"
                     },
                     {
                         "name": "dataflow.driver-memory",
                         "value": "512M"
                     },
                     {
                         "name": "dataflow.executor-memory",
                         "value": "1G"
                     },
                     {
                         "name": "dataflow.executor-cores",
                         "value": "2"
                     },
                     {
                         "name": "dataflow.verbose",
                         "value": "true"
                     },
                     {
                         "name": "dataflow.local-dirs",
                         "value": ""
                     },
                     {
                         "name": "dataflow.sink.concat-files",
                         "value": "true"
                     }
                 ],
                 "startTime": get_time()
             },
             "flowId": flow_id,
             "flowName": flow_name,
             "flowType": flow_type,
             "name": flow_name + 'scheduler' + str(random.randint(0, 9999))+str(random.randint(0, 9999)),
             "schedulerId": "once",
             "source": "rhinos"
         }
         data_list.append(data)
     print("------返回创建任务的data------")
     # print(data_list)
     return data_list
示例#5
0
def get_dataflow_data(flow_name):
    print("开始执行get_dataflow_data(flow_name)")
    ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"],
               MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])
    try:
        sql = 'select id, flow_type from merce_flow where name = "%s"' % flow_name
        flow_info = ms.ExecuQuery(sql)
        print(sql)
        print('flow_info:', flow_info)
    except Exception as e:
        raise e
    else:
        try:
            flow_id = flow_info[0]["id"]
            flow_type = flow_info[0]["flow_type"]
            # print(flow_name, flow_type)
        except KeyError as e:
            raise e

    data = {
        "configurations": {
            "arguments": [],
            "properties": [{
                "name": "all.debug",
                "value": "false"
            }, {
                "name": "all.dataset-nullable",
                "value": "false"
            }, {
                "name": "all.lineage.enable",
                "value": "true"
            }, {
                "name": "all.notify-output",
                "value": "false"
            }, {
                "name": "all.debug-rows",
                "value": "20"
            }, {
                "name": "dataflow.master",
                "value": "yarn"
            }, {
                "name": "dataflow.deploy-mode",
                "value": "client"
            }, {
                "name": "dataflow.queue",
                "value": "merce.normal"
            }, {
                "name": "dataflow.num-executors",
                "value": "2"
            }, {
                "name": "dataflow.driver-memory",
                "value": "512M"
            }, {
                "name": "dataflow.executor-memory",
                "value": "1G"
            }, {
                "name": "dataflow.executor-cores",
                "value": "2"
            }, {
                "name": "dataflow.verbose",
                "value": "true"
            }, {
                "name": "dataflow.local-dirs",
                "value": ""
            }, {
                "name": "dataflow.sink.concat-files",
                "value": "true"
            }],
            "startTime":
            get_time()
        },
        "flowId": flow_id,
        "flowName": flow_name,
        "flowType": flow_type,
        "name": flow_name + str(random.randint(0, 99999)),
        "schedulerId": "once",
        "source": "rhinos"
    }
    return data
示例#6
0
 def test_case02(self):
     """创建schedulers,周期执行"""
     scheduler_name = 'api_auto_create_schedulers_cron' + str(
         random.randint(0, 99999))
     # start_time = get_time()+(600*1000)  # starttime设为当前时间10分钟后
     start_time = get_time()  # starttime设为当前时间
     end_time = get_time() + (24 * 3600 * 1000)  # endtime设为当前时间1天后
     flow_table = load_workbook(abs_dir("flow_dataset_info.xlsx"))
     info_sheet = flow_table.get_sheet_by_name("flow_info")
     flow_id = info_sheet.cell(row=2, column=2).value
     flow_name = info_sheet.cell(row=2, column=3).value
     data = {
         "name": scheduler_name,
         "flowId": flow_id,
         "flowName": flow_name,
         "flowType": 'dataflow',
         "schedulerId": "cron",
         "source": "rhinos",
         "configurations": {
             "arguments": [],
             "cron":
             "0 0 8 * * ? ",
             "cronType":
             "simple",
             "endTime":
             end_time,
             "properties": [{
                 "name": "all.debug",
                 "value": "false"
             }, {
                 "name": "all.dataset-nullable",
                 "value": "false"
             }, {
                 "name": "all.notify-output",
                 "value": "false"
             }, {
                 "name": "all.debug-rows",
                 "value": "20"
             }, {
                 "name": "dataflow.master",
                 "value": "yarn"
             }, {
                 "name": "dataflow.queue",
                 "value": ["default"]
             }, {
                 "name": "dataflow.num-executors",
                 "value": "2"
             }, {
                 "name": "dataflow.driver-memory",
                 "value": "512M"
             }, {
                 "name": "dataflow.executor-memory",
                 "value": "1G"
             }, {
                 "name": "dataflow.executor-cores",
                 "value": "2"
             }, {
                 "name": "dataflow.verbose",
                 "value": "true"
             }, {
                 "name": "dataflow.local-dirs",
                 "value": ""
             }, {
                 "name": "dataflow.sink.concat-files",
                 "value": "true"
             }],
             "startTime":
             start_time
         }
     }
     res = requests.post(url=create_scheduler_url,
                         headers=get_headers(),
                         json=data)
     print(res.status_code, res.text)
     self.assertEqual(res.status_code, 201,
                      '创建周期执行的scheduler失败:%s' % res.text)
     time.sleep(5)
示例#7
0
 def test_rule_update(self):
     """更新规则-SQL类型的name"""
     data = {
         "id":
         self.sql_rule_id,
         "name":
         self.test_rule_detail() + '_after_update' +
         str(random.randint(0, 99999)),
         "creator":
         "admin",
         "createTime":
         1548389901000,
         "lastModifier":
         "admin",
         "lastModifiedTime":
         get_time(),
         "owner":
         "2059750c-a300-4b64-84a6-e8b086dbfd42",
         "version":
         1,
         "moduleVersion":
         0,
         "enabled":
         1,
         "tenant": {
             "id": "2d7ad891-41c5-4fba-9ff2-03aef3c729e5",
             "name": "default",
             "creator": "root",
             "createTime": 1532942318000,
             "lastModifier": "f8aff341-9303-4135-b393-1d322e4638e2",
             "lastModifiedTime": 1544078372000,
             "owner": "f8aff341-9303-4135-b393-1d322e4638e2",
             "version": 0,
             "moduleVersion": 0,
             "enabled": 1,
             "resourceQueues": ["default", "merce.normal"],
             "hdfsSpaceQuota": 0,
             "zid": "",
             "expiredPeriod": 0
         },
         "buildType":
         "Custom",
         "customType":
         "SQL",
         "ruleClass":
         "com.merce.woven.app.metadata.rule.RuleCustomSQLValidation",
         "customValue":
         "$grade > 70",
         "priority":
         1,
         "aggType":
         "None",
         "dataScope":
         "FieldsCombination",
         "fieldValueType":
         "Any",
         "ruleOption": {
             "paramsMap": {
                 "inputGroup": [{
                     "name": "customSqlValidation",
                     "vtype": "string",
                     "defaultValue": "",
                     "displayStr": "自定义 SQL 表达式",
                     "required": True
                 }],
                 "outputGroup": [{
                     "name": "outputFields",
                     "vtype": "MultiField",
                     "defaultValue": "*",
                     "displayStr": "输出字段",
                     "required": True
                 }, {
                     "name": "qualityType",
                     "vtype": "string",
                     "defaultValue": "normal",
                     "displayStr": "打分方式",
                     "required": True,
                     "valueOptions": ["normal", "ignore"]
                 }, {
                     "name": "outputLimit",
                     "vtype": "BigInt",
                     "defaultValue": "1000000",
                     "displayStr": "坏数据行数限制",
                     "required": False
                 }]
             },
             "outValueType": "Any",
             "outputFields": []
         },
         "regex":
         ".*",
         "regexFlag":
         0,
         "expiredPeriod":
         0
     }
     response = requests.put(url=self.query_rule_detail_url,
                             headers=get_headers(HOST_189),
                             json=data)
     self.assertEqual(204, response.status_code,
                      '更新规则接口调用失败,失败原因%s' % response.text)