示例#1
0
def initial_report(name: str, project_id: str, schedule_type: str,
                   schedule_date_key: int, schedule_time_key: int):
    to_zone = tz.gettz('Asia/Ho_Chi_Minh')
    from_zone = tz.tzutc()
    utc = datetime.datetime.utcnow()
    utc = utc.replace(tzinfo=from_zone)
    now = utc.astimezone(to_zone)
    report = FactEtlReportModel(project_id=project_id,
                                job_name=name,
                                executor_date_timestamp=now,
                                executor_date_key=func.time_to_date_key(now),
                                executor_time_key=func.time_to_time_key(now),
                                schedule_type=schedule_type,
                                schedule_date_key=schedule_date_key,
                                schedule_time_key=schedule_time_key)
    return report
 def fact_performance(self):
     report = report_func.initial_report('fact_performance', self.project_id, self.schedule_type, self.schedule_date_key, \
                                                                                                         self.schedule_time_key)
     start_run = time.time()
     try:
         datas = []
         data_performance = self.get_performance()
         _id = 1
         for performance in data_performance:
             process_key = func.get_process_key_performance_gda(performance['type'], performance['task_def_key'])
             captured_date_timestamp_utc_7 = performance['time'] + datetime.timedelta(hours = 7)
             document_key = None
             obj_ = FactPerformanceModel(
                     performance_key = _id,
                     ori_performance_id = func.bson_object_to_string(performance['_id']),
                     document_key = document_key,
                     project_id = self.project_id,  
                     group_id = performance['group_id'],  
                     document_id = performance['doc_id'],  
                     reworked = func.int_to_bool(performance['rework_count']),  
                     work_type_key = func.get_working_type_id_by_name(performance['work_type']),  
                     process_key = func.get_process_key_performance_gda(performance['type'], performance['task_def_key']),  
                     number_of_record = performance['records'],
                     number_of_item = performance['items'],  
                     number_of_field = performance['fields'],
                     number_of_character = performance['chars'],  
                     user_name = performance['username'], 
                     ip = performance['ip'], 
                     captured_date_timestamp = captured_date_timestamp_utc_7,  
                     captured_date_key = func.time_to_date_key(captured_date_timestamp_utc_7),  
                     captured_time_key = func.time_to_time_key(captured_date_timestamp_utc_7),  
                     total_time_second = performance['total_time']    
             )
             datas.append(obj_)
             _id+=1
         self.db.update([item.__dict__ for item in datas], self.schema, self.fact_performance_table)
         report.status_code = 'PASSED'
     except Exception as e:
         report.status_code = 'FAILED'
         report.description = str(e)
     finally:
         report.total_time_run_second = time.time()-start_run
         self.reports.append(report)
         return report
    def fact_data_extract(self):
        report = report_func.initial_report('fact_data_extraction', self.project_id, self.schedule_type, self.schedule_date_key, \
                                self.schedule_time_key)
        start_run = time.time()
        try:
            data_docs, data_trans = self.get_docs_and_trans()
            col_ignores = ['ImagePath']
            trans_ignore = ['doc_id', 'doc_uri', 'fileName', 'fileName_Bad', 'filter_control', 'getBatchName', 'keyer', \
                            'keyer_proof', 'keyer_type', 'FolderOutput', 'Image']
            results = []                
            for data in data_docs:
                records = data['records']
                document_id = func.bson_object_to_string(data['_id'])          
                document_key = None
                doc_set_id = func.bson_object_to_string(data['doc_set_id'])
                for i in range(len(records)):
                    record_id = i+1
                    record = records[i]
                    for key, value in record.items():
                        if key == 'keyed_data':
                            for keyed_data in value:
                                source = keyed_data['source']
                                task_def_key = keyed_data['task_def_key']
                                data_needed = keyed_data['data'][0].items()
                                last_modified_utc_7 = keyed_data['createdtime'] + datetime.timedelta(hours = 7)
                                user_name = keyed_data['keyer']
                                performance_key = None
                                if source != 'queue_transform' and task_def_key.startswith('Type'):
                                    process_key = 3 # human input keyed_data kpi
                                if source != 'queue_transform' and task_def_key == 'Verify_Hold_Type':
                                    process_key = 12 # human check bad_image keyed_data not kpi                               
                                elif source == 'queue_transform' and task_def_key.startswith('Type'):
                                    process_key = 4 # 'machine save input keyed_data'
                                elif source != 'queue_transform' and task_def_key.startswith('Proof'):
                                    process_key = 5 # human qc input keyed_data' kpi
                                elif source == 'queue_transform' and task_def_key.startswith('Proof'):
                                    process_key = 6 # 'machine save qc keyed_data'
                                for field_name, field_value_dict in data_needed:
                                    if field_name in col_ignores:
                                        continue
                                    _obj = FactDataExtractionModel(
                                        document_key = document_key,
                                        performance_key = performance_key,
                                        ori_document_id = document_id,
                                        project_id = self.project_id,
                                        document_id = document_id,
                                        doc_set_id =  doc_set_id,
                                        record_id = record_id,
                                        last_modified_date_key = func.time_to_date_key(last_modified_utc_7),
                                        last_modified_time_key = func.time_to_time_key(last_modified_utc_7),
                                        last_modified_timestamp = last_modified_utc_7,
                                        user_name = user_name,
                                        process_key = process_key,
                                        field_name = field_name,
                                        field_value = field_value_dict['text']
                                    )
                                    results.append(_obj)

                        elif key == 'final_data':
                            final_data = value[0]
                            data_needed = final_data['data'][0].items()
                            last_modified_utc_7 = final_data['createdtime'] + datetime.timedelta(hours = 7)
                            user_name = final_data['keyer']
                            process_key = 10
                            for field_name, field_value_dict in data_needed:
                                if field_name in col_ignores:
                                    continue
                                _obj = FactDataExtractionModel(
                                    document_key = document_key,
                                    performance_key = None,
                                    ori_document_id = document_id,
                                    project_id = self.project_id,
                                    document_id = document_id,
                                    doc_set_id =  doc_set_id,
                                    record_id = record_id,
                                    last_modified_date_key = func.time_to_date_key(last_modified_utc_7),
                                    last_modified_time_key = func.time_to_time_key(last_modified_utc_7),
                                    last_modified_timestamp = last_modified_utc_7,
                                    user_name = user_name,
                                    process_key = process_key,
                                    field_name = field_name,
                                    field_value = field_value_dict['text']
                                )
                                results.append(_obj)

                        elif key == 'qc_ed_data':
                            qc_ed_data = value[0][0]
                            if 'qc_fields_err' not in qc_ed_data.keys():
                                continue
                            qc_ed_data_err = qc_ed_data['qc_fields_err']
                            data_needed = qc_ed_data_err[0].items()
                            last_modified_utc_7 = qc_ed_data['createdtime'] + datetime.timedelta(hours = 7)
                            user_name = qc_ed_data['keyer']
                            process_key = 8
                            performance_key = None
                            for field_name, field_value_dict in data_needed:
                                if field_name in col_ignores:
                                    continue
                                _obj = FactDataExtractionModel(
                                    document_key = document_key,
                                    performance_key = performance_key,
                                    ori_document_id = document_id,
                                    project_id = self.project_id,
                                    document_id = document_id,
                                    doc_set_id =  doc_set_id,
                                    record_id = record_id,
                                    last_modified_date_key = func.time_to_date_key(last_modified_utc_7),
                                    last_modified_time_key = func.time_to_time_key(last_modified_utc_7),
                                    last_modified_timestamp = last_modified_utc_7,
                                    user_name = user_name,
                                    process_key = process_key,
                                    field_name = field_name,
                                    field_value = field_value_dict['text']
                                )
                                results.append(_obj)

                        elif key == 'apr_ed_data':
                            report.description = 'Not handle aprove qc data because not have sample data'
                            print('F**K???????: ')
                            
            for data in data_trans:
                document_id = func.bson_object_to_string(data['doc_id'])
                document_key = None
                ori_document_id = func.bson_object_to_string(data['_id'])
                doc_set_id =  func.bson_object_to_string(data['doc_set_id'])
                performance_key = None
                records = data['records']
                last_modified_utc_7 = data['last_modified'] + datetime.timedelta(hours = 7)
                for i in range(len(records)):
                    record = records[i]
                    record_id = i + 1
                    data_needed = record.items()
                    for field_name, field_value in data_needed:
                        if field_name in trans_ignore:
                            continue
                        _obj = FactDataExtractionModel(
                            document_key = document_key,
                            performance_key = performance_key,
                            ori_document_id = ori_document_id,
                            project_id = self.project_id,
                            document_id = document_id,
                            doc_set_id =  doc_set_id,
                            record_id = record_id,
                            last_modified_date_key = func.time_to_date_key(last_modified_utc_7),
                            last_modified_time_key = func.time_to_time_key(last_modified_utc_7),
                            last_modified_timestamp = last_modified_utc_7,
                            user_name = None,
                            process_key = 11,
                            field_name = field_name,
                            field_value = field_value
                        )
                        results.append(_obj)
            self.db.update([item.__dict__ for item in results], self.schema, self.fact_data_extraction_table)
            report.status_code = 'PASSED'
        except Exception as e:
            report.status_code = 'FAILED'
            report.description = str(e)
        finally:
            report.total_time_run_second = time.time()-start_run
            self.reports.append(report)
            return report
示例#4
0
    def fact_performance(self):
        report = report_func.initial_report('fact_performance', self.project_id, self.schedule_type, self.schedule_date_key, \
                                                                                                            self.schedule_time_key)
        start_run = time.time()
        try:
            datas = []
            data_performance = self.get_performance()
            _id = self.db.get_max_id_table(schema=self.schema,
                                           table=self.fact_performance_table,
                                           col='performance_key')
            if _id == None: _id = 1
            else: _id += 1
            test_3 = 0
            test_5 = 0
            test_8 = 0
            types = []
            tasks = []
            for performance in data_performance:
                process_key = func.get_process_key_performance_gda(
                    performance['type'], performance['task_def_key'])
                if performance['type'] not in types:
                    types.append(performance['type'])
                if performance['task_def_key'] not in tasks:
                    types.append(performance['task_def_key'])
                if process_key == 3:
                    test_3 += 1
                if process_key == 5:
                    test_5 += 1
                if process_key == 8:
                    test_8 += 1
                captured_date_timestamp_utc_7 = performance[
                    'time'] + datetime.timedelta(hours=7)
                document_key = self.get_document_key_by_document_id(
                    performance['doc_id'])
                obj_ = FactPerformanceModel(
                    performance_key=_id,
                    ori_performance_id=func.bson_object_to_string(
                        performance['_id']),
                    document_key=document_key,
                    project_id=self.project_id,
                    group_id=performance['group_id'],
                    document_id=performance['doc_id'],
                    reworked=func.int_to_bool(performance['rework_count']),
                    work_type_key=func.get_working_type_id_by_name(
                        performance['work_type']),
                    process_key=func.get_process_key_performance_gda(
                        performance['type'], performance['task_def_key']),
                    number_of_record=performance['records'],
                    number_of_item=performance['items'],
                    number_of_field=performance['fields'],
                    number_of_character=performance['chars'],
                    user_name=performance['username'],
                    ip=performance['ip'],
                    captured_date_timestamp=captured_date_timestamp_utc_7,
                    captured_date_key=func.time_to_date_key(
                        captured_date_timestamp_utc_7),
                    captured_time_key=func.time_to_time_key(
                        captured_date_timestamp_utc_7),
                    total_time_second=performance['total_time'])
                datas.append(obj_)
                self.performance_key_checks.append({
                    'performance_key':
                    _id,
                    'user_name':
                    performance['username'],
                    'document_key':
                    document_key,
                    'module_type':
                    performance['type'],
                    'task_def_key':
                    performance['task_def_key']
                })
                _id += 1
            print('types', types)
            print('tasks', tasks)
            print('human_input', test_3)
            print('human_verify_input', test_5)
            print('human_qc_data', test_8)

            self.db.create([item.__dict__ for item in datas], self.schema,
                           self.fact_performance_table)
            report.status_code = 'PASSED'
        except Exception as e:
            report.status_code = 'FAILED'
            report.description = str(e)
        finally:
            report.total_time_run_second = time.time() - start_run
            self.reports.append(report)
            return report