示例#1
0
 def prepare(self):
     config_json = to_json(self.config, indent=4)
     config_json = config_json.replace("__", ".")
     config_path = mk_working_directory(self.project_id, self.component_id,
                                        RobotX.CONFIG_FILE_NAME)
     with open(config_path, 'w', encoding='utf-8') as f:
         f.write(config_json)
示例#2
0
 def need_execution(self, force=False):
     changed = True if force else self.changed()
     if changed:
         if not self.loaded:
             self.load_from_db()
         pickle_path = mk_working_directory(self.project_id,
                                            self.component_id,
                                            Component.PREVIOUS)
         with open(pickle_path, 'wb') as f:
             pickle.dump(self, f)
         self.prepare()
     return changed
示例#3
0
def check_target(project_id, input_comp_id, target):
    csv_reader = CsvReaderInfo.objects.filter(project_id=project_id,
                                              component_id=input_comp_id)
    if len(csv_reader) == 0:
        return True
    csv_reader = csv_reader[0]
    try:
        assert isinstance(csv_reader, CsvReaderInfo)
        data_saving_path = mk_working_directory(project_id, input_comp_id,
                                                csv_reader.file_name)
        csv_reader = pandas.read_csv(data_saving_path, usecols=[target])
        df = pandas.DataFrame(csv_reader)
        detail = dict(df.groupby([target]).size())
        if not set([0, 1]) >= set(list(detail)):
            return True
        return (detail.get(0) < detail.get(1))
    except UnicodeDecodeError as e:
        return True
示例#4
0
def perview(request, project_id, component_id):
    self_defined_feature = CsvReaderInfo.objects.filter(
        project_id=project_id, component_id=component_id)
    if len(self_defined_feature) == 0:
        return Response.fail(ERRORS.NOT_INITED)
    data_saving_path = mk_working_directory(project_id, component_id,
                                            'data.csv')
    result = list()
    with (open(data_saving_path, 'r', encoding='utf-8')) as f:
        csv_reader = csv.reader(f)
        for row_num, row in enumerate(csv_reader):
            if row_num > 10:
                break
            if len(result) == 0:
                for col in row:
                    result.append(dict(name=col, value=list()))
            else:
                for column, sample in zip(result, row):
                    column['value'].append(sample)
    return Response.success(result)
示例#5
0
def perview(request, project_id, component_id):
    # self_defined_feature = SelfDefinedFeature.objects.filter(project_id=project_id, component_id=component_id)
    # if len(self_defined_feature)==0:
    #     return HttpResponse(Response.fail(ERRORS.NOT_INITED, None).to_json())
    data_saving_path = mk_working_directory(project_id, component_id,
                                            'data.csv')
    if not os.path.exists(data_saving_path):
        return HttpResponse(
            Response.fail(ERRORS.CSV_NOT_UPLOAD, None).to_json())
    result = list()
    with (open(data_saving_path, 'r', encoding='utf-8')) as f:
        csv_reader = csv.reader(f)
        for row_num, row in enumerate(csv_reader):
            if row_num > 10:
                break
            if len(result) == 0:
                for col in row:
                    result.append(dict(name=col, value=list()))
            else:
                for column, sample in zip(result, row):
                    column['value'].append(sample)
    return HttpResponse(Response.success(result).to_json())
示例#6
0
def upload(request, project_id, component_id, file):
    # 保存文件
    file_name = file.name
    data_saving_path = mk_working_directory(project_id, component_id,
                                            file_name)
    with open(data_saving_path, 'wb') as destination:
        if file.multiple_chunks():
            for chunk in file.chunks():
                destination.write(chunk)
        else:
            destination.write(file.read())
    # 检查文件,判断数据类型
    response = None
    field_types = None  # type: dict[str,FieldType]
    try:
        header = None
        column_num = -1
        with (open(data_saving_path, 'r', encoding='utf-8')) as f:
            csv_reader = csv.reader(f)
            for row_num, row in enumerate(csv_reader):
                # if row_num == 0 and "Target" not in [item.capitalize() for item in row]:
                #     return Response.fail(ERRORS.NO_TARGET_FEILD, None)
                if row_num > 21:
                    break
                if header is None:
                    column_num = len(row)
                    if column_num < 2:
                        # csv列数量太少
                        response = Response.fail(ERRORS.CSV_COLUMN_SIZE_ERROR,
                                                 None)
                        return response
                    header = row
                    field_types = {column: FieldType(column) for column in row}
                else:
                    len_of_column = len(row)
                    if len_of_column != column_num:
                        response = Response.fail(
                            ERRORS.CSV_COLUMN_NUM_ERROR,
                            dict(header_column_num=column_num,
                                 line=row_num + 1,
                                 row_column_num=len_of_column))
                        return response
                    for column, sample in zip(header, row):
                        # if column.capitalize() == "Target" and sample not in ["0","1"]:
                        #     return Response.fail(ERRORS.TARGET_FIELD_ERROR, None)
                        field_types[column].add_sample_data(sample)
        if header is None:
            response = Response.fail(ERRORS.CSV_EMPTY, None)
            return response
        if len(field_types[header[0]].sample_data) < 20:
            response = Response.fail(ERRORS.CSV_ROW_TOO_SMALL, None)
            return response
        # 数据类型判断
        db_field_types = []
        fields = field_types.values()
        sorted(fields, key=lambda x: x.field)

        for head in header:
            field = field_types[head]
            field.guess_field_type()
            # for field in field_types.values():
            #     field.guess_field_type()
            db_field_types.append(field.to_db_type(project_id, component_id))

        # 保存类型
        CsvReaderInfotype.objects.filter(project_id=project_id,
                                         component_id=component_id).delete()
        CsvReaderInfotype.objects.bulk_create(db_field_types)
        response = Response.success(db_field_types)
        # response = Response.success(list(field_types.values()))
        return response
    except UnicodeDecodeError as e:
        response = Response.fail(ERRORS.CSV_UTF8_ERROR, None)
        return response
示例#7
0
 def get_config_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 FeatureCombine.CONFIG_FILE_NAME)
 def csv_file_path(project_id, component_id):
     return mk_working_directory(project_id, component_id, SelfDefinedFeature.CSV_NAME)
示例#9
0
 def get_test_metrics_local_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomTest.TEST_METRICS)
示例#10
0
 def get_robotx_dict_local_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomLearn.ROBOTX_DICT)
示例#11
0
def upload(request, project_id, component_id, file):
    # 保存文件
    file_name = file.name
    data_saving_path = mk_working_directory(project_id, component_id,
                                            'data.csv')
    with open(data_saving_path, 'wb') as destination:
        if file.multiple_chunks():
            for chunk in file.chunks():
                destination.write(chunk)
        else:
            destination.write(file.read())
    # 检查文件,判断数据类型
    response = None
    field_types = None  # type: dict[str,FieldType]
    try:
        header = None
        column_num = -1
        with (open(data_saving_path, 'r', encoding='utf-8')) as f:
            csv_reader = csv.reader(f)
            for row_num, row in enumerate(csv_reader):
                if row_num > 21:
                    break
                if header is None:
                    column_num = len(row)
                    if column_num < 2:
                        # csv列数量太少
                        response = Response.fail(ERRORS.CSV_COLUMN_SIZE_ERROR,
                                                 None)
                        return HttpResponse(response.to_json())
                    header = row
                    field_types = {column: FieldType(column) for column in row}
                else:
                    len_of_column = len(row)
                    if len_of_column != column_num:
                        response = Response.fail(
                            ERRORS.CSV_COLUMN_NUM_ERROR,
                            dict(header_column_num=column_num,
                                 line=row_num + 1,
                                 row_column_num=len_of_column))
                        return HttpResponse(response.to_json())
                    for column, sample in zip(header, row):
                        field_types[column].add_sample_data(sample)
        if header is None:
            response = Response.fail(ERRORS.CSV_EMPTY, None)
            return HttpResponse(response.to_json())
        if len(field_types[header[0]].sample_data) < 20:
            response = Response.fail(ERRORS.CSV_ROW_TOO_SMALL, None)
            return HttpResponse(response.to_json())
        # 数据类型判断
        db_field_types = []
        # fields = field_types.values()
        # sorted(fields, key=lambda x:x.field)
        for field in field_types.values():
            field.guess_field_type()
            db_field_types.append(field.to_db_type(project_id, component_id))
        # 保存组件
        SelfDefinedFeature.objects.filter(project_id=project_id,
                                          component_id=component_id).delete()
        SelfDefinedFeature(project_id=project_id,
                           component_id=component_id,
                           file_name=file_name).save()

        # 保存类型
        SelfDefinedFeatureType.objects.filter(
            project_id=project_id, component_id=component_id).delete()
        SelfDefinedFeatureType.objects.bulk_create(db_field_types)

        response = Response.success(list(field_types.values()))
        return HttpResponse(response.to_json())
    except UnicodeDecodeError as e:
        response = Response.fail(ERRORS.CSV_UTF8_ERROR, None)
        return HttpResponse(response.to_json())
示例#12
0
 def csv_reader_dict_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomExplore.EXPLORE_DICT_FILE)
示例#13
0
 def get_config_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomAct.CONFIG_FILE_NAME)
示例#14
0
 def get_export_model_local_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomLearn.EXPORT_MODEL_MOJO)
示例#15
0
 def prepare(self):
     config_json = to_json(self.config, indent=4)
     config_path = mk_working_directory(self.project_id, self.component_id,
                                        FeatureCombine.CONFIG_FILE_NAME)
     with open(config_path, 'w', encoding='utf-8') as f:
         f.write(config_json)
示例#16
0
 def get_zip_export_model_local_path(project_id, component_id,
                                     export_model_zipfile):
     return mk_working_directory(project_id, component_id,
                                 export_model_zipfile)
示例#17
0
 def get_yarn_log_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 Component.YARN_LOG_NAME)
示例#18
0
 def get_config_path(self):
     return mk_working_directory(self.project_id, self.component_id,
                                 RobotX.CONFIG_FILE_NAME)
示例#19
0
 def get_data_dir_path(project_id, component_id, file_name):
     return mk_working_directory(project_id, component_id, file_name)
示例#20
0
 def get_model_properties_local_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomLearn.MODEL_PROPERTIES)
示例#21
0
    def __load_from_db__(self):
        project_id = self.project_id
        component_id = self.component_id

        atom_explore_model = AtomExploreModel.objects.filter(
            project_id=project_id, component_id=component_id)
        if len(atom_explore_model) == 0:
            raise Exception("ATOM EXPLORE NOT CONFIGURED")
        atom_explore_model = atom_explore_model[0]
        assert isinstance(atom_explore_model, AtomExploreModel)

        input_comp_id = atom_explore_model.input_comp_id
        feature_id = atom_explore_model.feature_id
        feature_target = atom_explore_model.feature_target

        # data.filename 数据文件名
        data__filename = None
        # dictionary.filename 字典文件名
        dictionary__filename = None
        # 训练数据路径
        input_comp_type = extract_component_type(input_comp_id)
        if input_comp_type == COMPONENTS.CSV_READER:
            # csv_reader
            csv_reader = CsvReaderInfo.objects.filter(
                project_id=project_id, component_id=input_comp_id)
            if len(csv_reader) == 0:
                raise Exception("ATOM EXPLORE INPUT CSVREADER NOT FOUND")
            csv_reader = csv_reader[0]
            assert isinstance(csv_reader, CsvReaderInfo)
            input_file = csv_reader.file_name
            data__filename = "%s/%s" % (mk_working_directory(
                project_id, input_comp_id), input_file)
            # 生成数据字典
            io_field_types = CsvReaderInfotype.objects.filter(
                project_id=project_id,
                component_id=input_comp_id,
                selected=True)
            with open(AtomExplore.csv_reader_dict_path(project_id,
                                                       component_id),
                      'w',
                      encoding='utf-8') as f:
                lines = list()
                lines.append("variable,type\n")
                for io_f_type_ in io_field_types:
                    assert isinstance(io_f_type_, CsvReaderInfotype)
                    if io_f_type_.field_type not in ["factor", "numeric"]:
                        continue
                    lines.append('"%s",%s\n' %
                                 (io_f_type_.field, io_f_type_.field_type))
                f.writelines(lines)
            dictionary__filename = AtomExplore.csv_reader_dict_path(
                project_id, component_id)
        elif input_comp_type == COMPONENTS.ROBOTX:
            # robotx
            # relations = Relation.objects.filter(project_id=project_id,component_id=input_comp_type)
            containers = Container.objects.filter(project_id=project_id,
                                                  component_id=input_comp_id)
            # if len(relations)==0:
            #     raise Exception("ATOM EXPLORE INPUT ROBOTX-RELATION NOT FOUND")
            if len(containers) == 0:
                raise Exception(
                    "ATOM EXPLORE INPUT ROBOTX-CONTAINER NOT FOUND")
            # relation = relations[0]
            container = containers[0]
            csvReaders = CsvReaderInfo.objects.filter(
                project_id=project_id, component_id=container.container_id)
            # csvReader1 = CsvReaderInfo.objects.filter(project_id=project_id, component_id=relation.target)
            if len(csvReaders) == 0:
                raise Exception(
                    "ATOM EXPLORE INPUT ROBOTX-CSVREADER NOT FOUND")
            dictionary__filename = RobotX.output_dict(project_id,
                                                      input_comp_type)
            data__filename = "%s/%s" % (Component.cluster_working_directory(
                project_id,
                csvReaders[0].component_id), csvReaders[0].file_name)
        # explore输出路径
        output__dir = self.explore_fold_path(project_id, component_id)
        self.config = Config(data__filename, dictionary__filename, feature_id,
                             feature_target, output__dir)
        # data__filename, dictionary__filename, id__varname, target__varname, output__dir

        algorithm_params = setting.EXPLORE_COMMON_PARAMS
        atom_explore_param = AtomExploreParam.objects.filter(
            project_id=project_id, component_id=component_id)
        if len(algorithm_params) != len(atom_explore_param):
            raise Exception("ALGORITHM %s LUCK OF PARAMETER" %
                            str(ALGORITHM_COMMON_PARAMS))
        for param in atom_explore_param:
            assert isinstance(param, AtomExploreParam)
            param_name = param.param_name
            param_value = param.param_value
            # 转换为真实参数
            param_description = COMM_PARAMS[param_name]
            true_value = param_transform(param_description, param_value)
            if param_name in ALGORITHM_COMMON_PARAMS:
                # 通用参数
                self.config.add_common_param(param_name, true_value)
示例#22
0
 def get_model_metrics_local_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomLearn.MODEL_METRICS)
示例#23
0
 def get_prediction_csv_local_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomAct.PREDICTION_CSV)
示例#24
0
 def hive_reader_dict_path(project_id, component_id):
     return mk_working_directory(project_id, component_id,
                                 AtomLearn.HIVE_READER_DICT_NAME)