示例#1
0
def send_yagmail(emails_to, subject, message, email_from = None, attachment_path =  None, smtp_server = None, smtp_server_port = None):
    root_dir = cm.get_project_root()
    cnf_path = str(root_dir.joinpath(gc.MAIN_CONFIG_FILE))
    m_cfg = ConfigData(cnf_path)
    if not email_from:
        email_from = m_cfg.get_value('Email/default_from_email')
    if not smtp_server:
        smtp_server = m_cfg.get_value('Email/smtp_server')
    if not smtp_server_port:
        smtp_server_port = m_cfg.get_value('Email/smtp_server_port')
    
    # receiver = emails_to  # '[email protected], [email protected], [email protected]'
    body = message
    filename = attachment_path  # 'test.png'
    
    yag = yagmail.SMTP(email_from,
                       host=smtp_server,
                       smtp_skip_login=True,
                       smtp_ssl=False,
                       soft_email_validation=False,
                       port=smtp_server_port)
    yag.send(
        to=emails_to,
        subject=subject,
        contents=body, 
        attachments=filename,
    )
示例#2
0
def process(data_type, study_id, center_id, center_ids, dataset_type_id,
            out_file, output_format, server_url):
    # print ("data_type = {}, study_id = {}, out_file = {}".format(data_type, study_id, out_file))
    # get URL of the API server
    from utils import ConfigData
    main_cfg = ConfigData('configs/main_config.yaml')
    api_server_url = main_cfg.get_value('SAMPLEINFO_CLI_URL')
    if server_url:
        # print('server_url: {}'.format(api_server_url))
        click.echo('server_url: {}'.format(api_server_url))

    if check_data_type_value(data_type):
        api_url, err_msg = identify_api_url(api_server_url, data_type,
                                            study_id, center_id, center_ids,
                                            dataset_type_id)
    else:
        api_url = ''
        err_msg = 'Unexpected data_type value ({}) was provided. Run --help for the list of expected values.'\
            .format(data_type)
    if len(err_msg) == 0:
        if len(api_url) > 0:
            # access api and retrieve the data
            response = requests.get(api_url)
            # print ("data_type = {}, study_id = {}, out_file = {}".format(data_type, stu)
            # print(response.status_code)
            # json_parsed =

            output_data(response.json(), out_file, output_format)
        else:
            print(
                'Error: Cannot identify the database call for the given parameters.'
            )
    else:
        # report an error
        print('Error: {}'.format(err_msg))
示例#3
0
def convert_sub_aliq_to_aliquot(sa, assay):
    aliquot = sa
    fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)
    assay_postfixes = fl_cfg_dict.get_value('assay_sub_aliquot_postfix/' + assay)  # get_item_by_key
    if assay_postfixes is not None:
        for assay_postfix in assay_postfixes:
            apf_len = len(assay_postfix)
            if sa[-apf_len:] == assay_postfix:
                aliquot = sa[:len(sa) - apf_len]
                break  # exit loop if a match was found
    return aliquot
示例#4
0
    def load_assay_conf(self, assay, project):
        assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project)
        cfg_assay = ConfigData(assay_cfg_path)
        assay_config = cfg_assay.get_value(assay.upper())
        if assay_config:
            self.logger.info(
                "Configuration for the {} assay was loaded from the assay config file: {}. "
                .format(assay.upper(), assay_cfg_path))
        else:
            _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \
                   "Aborting execution.".format(assay.upper(), assay_cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)

        return assay_config
示例#5
0
class Request(File):
    def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        if main_cfg:
            self.conf_main = main_cfg
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # if cfg_path=='':
        #     self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # else:
        #     self.conf_main = ConfigData(cfg_path)

        self.error = RequestError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Submission request file {}'.format(filepath))

        # self.file_dict = OrderedDict()
        # self.rows = OrderedDict()

        self.columnlist = []
        self.samples = []
        self.sub_aliquots = []
        self.disqualified_sub_aliquots = {}
        self.aliquots_to_subaliquots_map = {
        }  # holds the map of aliquots to sub-aliquots for interpreting DB responses
        self.disqualified_request_path = ''  # will store path to a request file with disqualified sub-aliquots
        self.project = ''
        self.bulk_location = ''
        self.assay = ''
        self.center = ''
        self.center_id = None
        self.center_code = None
        self.experiment_id = ''
        self.data_source_names = ''
        self.data_source_objects = {
        }  # dictionary to store all collected data sources for the request

        self.aliquots = None
        self.qualified_aliquots = None
        self.raw_data = None
        self.assay_data = None
        self.attachments = None
        self.submission_forms = None
        self.submission_package = None
        self.data_source_names = None
        # will hold value corresponding to the type of data source being used (attachments are not ignored)
        # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used
        self.data_source_forms_assignment = None

        # self.sheet_name = ''
        self.sheet_name = sheet_name.strip()
        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_assay = None

        self.get_file_content()

    def get_file_content(self):
        if not self.columnlist:
            if cm.file_exists(self.filepath):
                self.logger.debug('Loading file content of "{}"'.format(
                    self.filepath))

                with xlrd.open_workbook(self.filepath) as wb:
                    if not self.sheet_name or len(self.sheet_name) == 0:
                        # by default retrieve the first sheet in the excel file
                        sheet = wb.sheet_by_index(0)
                    else:
                        # if sheet name was provided
                        sheets = wb.sheet_names()  # get list of all sheets
                        if self.sheet_name in sheets:
                            # if given sheet name in the list of available sheets, load the sheet
                            sheet = wb.sheet_by_name(self.sheet_name)
                        else:
                            # report an error if given sheet name not in the list of available sheets
                            _str = (
                                'Given worksheet name "{}" was not found in the file "{}". '
                                'Verify that the worksheet name exists in the file.'
                            ).format(self.sheet_name, self.filepath)
                            self.error.add_error(_str)
                            self.logger.error(_str)

                            self.lineList = None
                            self.loaded = False
                            return self.lineList

                sheet.cell_value(0, 0)

                lines = [
                ]  # will hold content of the request file as an array of arrays (rows)
                for i in range(sheet.ncols):
                    column = []
                    for j in range(sheet.nrows):
                        if i == 0:
                            lines.append(
                                []
                            )  # adds an array for each new row in the request file

                        # print(sheet.cell_value(i, j))
                        cell = sheet.cell(j, i)
                        cell_value = cell.value
                        # take care of number and dates received from Excel and converted to float by default
                        if cell.ctype == 2 and int(cell_value) == cell_value:
                            # the key is integer
                            cell_value = str(int(cell_value))
                        elif cell.ctype == 2:
                            # the key is float
                            cell_value = str(cell_value)
                        # convert date back to human readable date format
                        # print ('cell_value = {}'.format(cell_value))
                        if cell.ctype == 3:
                            cell_value_date = xlrd.xldate_as_datetime(
                                cell_value, wb.datemode)
                            cell_value = cell_value_date.strftime(
                                "%Y-%m-%directory")
                        column.append(
                            cell_value
                        )  # adds value to the current column array
                        lines[j].append(
                            '"' + str(cell_value) + '"'
                        )  # adds value in "csv" format for a current row

                    # self.columnlist.append(','.join(column))
                    self.columnlist.append(
                        column)  # adds a column to a list of columns

                # populate lineList property
                self.lineList = []
                for ln in lines:
                    self.lineList.append(','.join(ln))

                wb.unload_sheet(sheet.name)

                # load passed request parameters (by columns)
                self.get_request_parameters()

                # validate provided information
                self.logger.info(
                    'Validating provided request parameters. project: "{}", bulk location: "{}", '
                    'assay: "{}", db_center_code_or_id: "{}",'
                    'Sub-Aliquots: "{}"'.format(self.project,
                                                self.bulk_location, self.assay,
                                                self.center,
                                                self.sub_aliquots))
                self.validate_request_params()

                if self.error.exist():
                    # report that errors exist
                    self.loaded = False
                    # print(self.error.count)
                    # print(self.error.get_errors_to_str())
                    _str = 'Errors ({}) were identified during validating of the request. \nError(s): {}'.format(
                        self.error.count, self.error.get_errors_to_str())
                else:
                    self.loaded = True
                    _str = 'Request parameters were successfully validated - no errors found.'
                self.logger.info(_str)

                # combine Experiment_id out of request parameters
                if self.center_code and len(self.center_code.strip()) > 0:
                    # use center code if available
                    self.experiment_id = "_".join(
                        [self.project, self.center_code, self.assay])
                else:
                    # use provided value for the center column from request, if center_code is not available
                    self.experiment_id = "_".join(
                        [self.project, self.center, self.assay])

            else:
                _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format(
                    self.filepath)
                self.error.add_error(_str)
                self.logger.error(_str)

                self.columnlist = None
                self.lineList = None
                self.loaded = False
        return self.lineList

    # get all values provided in the request file
    def get_request_parameters(self):
        self.project = self.columnlist[0][1]
        self.bulk_location = self.columnlist[1][1]
        self.assay = self.columnlist[2][1].lower()
        self.center = self.columnlist[3][
            1]  # center code (if alpha numeric) or center id (if numeric)
        self.sub_aliquots = self.columnlist[4]
        if self.sub_aliquots and len(self.sub_aliquots) > 0:
            self.sub_aliquots.pop(0)  # get rid of the column header
        # self.samples = self.columnlist[5]
        # if self.samples and len(self.samples) > 0:
        #     self.samples.pop(0) # get rid of the column header

    # validates provided parameters (loaded from the submission request file)
    def validate_request_params(self):
        _str_err = ''
        _str_warn = ''
        if len(self.sub_aliquots) == 0:
            _str_err = '\n'.join([
                _str_err, 'List of provided sub-samples is empty. '
                'Aborting processing of the submission request.'
            ])
        # Check if empty sub-samples were provided
        if '' in self.sub_aliquots:
            i = 0
            cleaned_cnt = 0
            for s in self.sub_aliquots:
                # check for any empty sub-aliquot values and remove them
                if len(s.strip()) == 0:
                    self.sub_aliquots.pop(i)
                    cleaned_cnt += 1
                else:
                    i += 1
            if cleaned_cnt > 0:
                _str_warn = '\n'.join([
                    _str_warn,
                    'Empty sub-aliqouts (count = {}) were removed from the list. '
                    'Here is the list of sub-aliqouts after cleaning (count = {}): "{}" '
                    .format(cleaned_cnt, len(self.sub_aliquots),
                            self.sub_aliquots)
                ])
        # check for empty values
        if len(self.project) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Program name was provided. Aborting processing of the submission request.'
            ])
        if len(self.bulk_location) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Bulk Location was provided. Aborting processing of the submission request.'
            ])
        if len(self.assay) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Assay was provided. Aborting processing of the submission request.'
            ])
        if len(self.center) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No DB Center information was provided. Aborting processing of the submission request.'
            ])

        # check for values that should match some predefined values from a dictionary
        # check assay value
        if not cm2.key_exists_in_dict(self.assay, 'assay'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Assay name "{}" is not matching a list of expected assay names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.assay, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.assay = cm2.get_dict_value(self.assay, 'assay')

        # check project value
        if not cm2.key_exists_in_dict(self.project.lower(), 'project'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Program name "{}" is not matching a list of expected names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.project, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.project = cm2.get_dict_value(self.project.lower(), 'project')

        # validate center_code or center_id value
        self.logger.info(
            'Start validation of center value "{}" provided in the request'.
            format(self.center))
        db = DBAccess(self.logger, self.error,
                      self.conf_main)  # create DBAccess object
        db.open_connection()
        # test center value assuming center code was provided
        dataset = db.validate_center_code(self.center, self.project, 'code',
                                          'code')
        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
            dataset, 'center_id', 'center_code')
        if center_id_out1:
            # center id was returned, meaning center was validated fine
            self.center_id = center_id_out1
            # get center code value from the current DB dataset
            _str_err_out3, center_code = self.get_field_value_from_dataset(
                dataset, 'center_code')
            if center_code:
                # center code retrieved OK
                self.center_code = center_code
            else:
                # report an error during retrieving center_code
                _str_err = '\n'.join([_str_err, _str_err_out3])
        else:
            # if center code was not validated at first attempt, validate it assuming the center id was given
            dataset = db.validate_center_code(self.center, self.project, 'id',
                                              'code')
            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                dataset, 'center_id', 'center_id')
            if center_id_out2:
                # center id was validated at the 2nd attempt, ignore the 1st failed center code validation
                self.center_id = center_id_out2
                # get center code value from the current DB dataset
                _str_err_out3, center_code = self.get_field_value_from_dataset(
                    dataset, 'center_code')
                if center_code:
                    # center code retrieved OK
                    self.center_code = center_code
                else:
                    # report an error during retrieving center_code
                    _str_err = '\n'.join([_str_err, _str_err_out3])
            else:
                # center validation attempts failed, report both failures
                _str_err = '\n'.join([_str_err, _str_err_out1, _str_err_out2])

        # get list of aliquots from list of sub-aliquots
        self.aliquots = [
            cm2.convert_sub_aliq_to_aliquot(al, self.assay)
            for al in self.sub_aliquots
        ]

        # create a map to convert aliquot value to sub_aliquot value (for processing DB responses given for aliquots)
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            self.aliquots_to_subaliquots_map[a] = sa

        if self.center_id:
            self.logger.info('Start validation of aliquot ids vs DB')
            # if center id was validated in the above code, validate received aliquots vs manifest dataset in DB
            dataset = db.validate_aliquot_ids(self.center_id, self.aliquots)
            if dataset:
                # create dictionary of received aliquots/sample ids
                aliquots_to_samples_map = {}
                for row in dataset:
                    if '_aliquot_id' in row and '_sample_id' in row:
                        aliquots_to_samples_map[
                            row['_aliquot_id']] = row['_sample_id']
                # check if each aliquot id was returned from a database and get the sample id from the dataset
                for sa, a in zip(self.sub_aliquots, self.aliquots):
                    if a in aliquots_to_samples_map:
                        if len(str(aliquots_to_samples_map[a]).strip()) > 0:
                            self.samples.append(aliquots_to_samples_map[a])
                        else:
                            _str = 'Blank Sample Id value was returned from DB for the sub-aliquot id "{}". ' \
                                   'The sub-aliquot was disqualified'.format(sa)
                            self.disqualify_sub_aliquot(sa, _str)
                            _str_warn = '\n'.join([_str_warn, _str])
                    else:
                        _str = 'Sub-aliquot id "{}" was not found in the database and was disqualified'.format(
                            sa)
                        self.disqualify_sub_aliquot(sa, _str)
                        _str_warn = '\n'.join([_str_warn, _str])
            else:
                _str_err = '\n'.join([
                    _str_err,
                    'Aliquot ids cannot be validated since no data was returned from DB for '
                    'center_id = "{}" and aliquot ids as following: {} '.
                    format(self.center_id, self.aliquots)
                ])
        db = None

        # report any collected errors
        if len(_str_err) > 0:
            _str_err = 'Validation of request parameters:' + _str_err
            self.error.add_error(_str_err)
            self.logger.error(_str_err)
        # report any collected warnings
        if len(_str_warn) > 0:
            _str_warn = 'Validation of request parameters:' + _str_warn
            self.logger.warning(_str_warn)

    def check_validation_dataset_outcome(self, dataset, validation_id_column,
                                         validation_id_name):
        _str_err = ''
        row_num = 1
        validation_id_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]  # get the first row of the dataset
                if 'status' in row:
                    status = row['status']
                if 'description' in row:
                    description = row['description']
                if validation_id_column in row:  # center_id
                    validation_id = row[validation_id_column]
            if status == 'OK':  # validation was successful
                validation_id_out = validation_id
            elif status == 'Failed':  # validation has failed
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB has Failed, description: {}'
                    .format(validation_id_name, description)
                ])
            else:  # unexpected status value was returned
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB returned unexpected status {}'
                    .format(validation_id_name, status)
                ])
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during validating {} in the DB. '
                'Check earlier entries in the log file.'.format(
                    validation_id_name)
            ])

        return _str_err, validation_id_out

    def get_field_value_from_dataset(self, dataset, field_name, row_num=None):
        # set default values
        if row_num is None:
            row_num = 1  # default row is #1

        _str_err = ''
        value_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]
                if field_name in row:
                    value_out = row[field_name]
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during retrieving value of "{}" (row #{})from the dataset. '
                .format(field_name, row_num)
            ])

        return _str_err, value_out

    def setup_logger(self, wrkdir, filename):

        # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)

        log_folder_name = gc.REQ_LOG_DIR  # gc.LOG_FOLDER_NAME

        # m_logger_name = gc.MAIN_LOG_NAME
        # m_logger = logging.getLogger(m_logger_name)

        logger_name = gc.REQUEST_LOG_NAME
        logging_level = self.conf_main.get_value('Logging/request_log_level')

        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(log_folder_name):
            log_folder_path = Path(wrkdir) / log_folder_name
        else:
            log_folder_path = Path(log_folder_name)

        lg = setup_logger_common(
            logger_name,
            logging_level,
            log_folder_path,  # Path(wrkdir) / log_folder_name,
            str(filename) + '_' +
            time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log')

        self.log_handler = lg['handler']
        return lg['logger']

    def load_request_configuration(self):
        # update main config file with the project/environmetn specific details from additional config files
        self.load_project_config_into_main(
            self.project
        )  # loads project specific config and merges it into main config
        # load project specific assay config file
        self.conf_assay = self.load_assay_conf(self.assay, self.project)
        if self.conf_assay:
            # update loaded assay config file with project/environment specific config assay_locatoin_config.yaml
            self.conf_assay = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_ASSAY_LOCATION, self.project, self.conf_assay)

    def process_request(self):
        self.data_source_names = cm.get_value_from_dictionary(
            'data_sources', self.conf_assay)  # self.conf_assay['data_sources']

        # path to the folder where created submission packages will be located.
        # since this location can be provided in the project config file, this assignment is happening
        # after loading the project config
        gc.OUTPUT_PACKAGES_DIR = self.conf_main.get_value(
            'Submission_location/output_packages')

        for data_source_name in self.data_source_names:
            # if isinstance(data_source_name, tuple)
            if isinstance(data_source_name, str):
                if data_source_name == 'attachment':
                    self.attachments = Attachment(self)
                elif data_source_name[-3:] == "_db":
                    self.data_source_objects[data_source_name] = DataSourceDB(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'db'
                else:
                    self.data_source_objects[data_source_name] = DataSource(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'file'
            elif isinstance(data_source_name, tuple):
                if data_source_name[0][-3:] == "_db":
                    self.data_source_objects[
                        data_source_name[0]] = DataSourceDB(
                            self, data_source_name[0], data_source_name[1])
                else:
                    self.data_source_objects[data_source_name[0]] = DataSource(
                        self, data_source_name[0], data_source_name[1])
            else:
                self.logger.error(
                    'Provided data source name ({}) is of unexpected format and cannot be processed.'
                    .format(data_source_name))

        # if data_source_forms_assignment was not assigned with any value in code before, assign a default to it
        # this a case when an assay submits only attachments and do not use any assay or QC data
        if not self.data_source_forms_assignment:
            self.data_source_forms_assignment = gc.DEFAULT_DATA_SOURCE_FORMS_ASSIGNMENT

        self.submission_package = SubmissionPackage(self)

        self.create_request_for_disqualified_sub_aliquots()

        self.create_trasfer_script_file()

        # check for errors and put final log entry for the request.
        if self.error.exist():
            _str = 'Processing of the current request was finished with the following errors: {}\n'.format(
                self.error.get_errors_to_str())
            self.logger.error(_str)
        else:
            _str = 'Processing of the current request was finished successfully.\n'
            self.logger.info(_str)

    def load_assay_conf(self, assay, project):
        assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project)
        cfg_assay = ConfigData(assay_cfg_path)
        assay_config = cfg_assay.get_value(assay.upper())
        if assay_config:
            self.logger.info(
                "Configuration for the {} assay was loaded from the assay config file: {}. "
                .format(assay.upper(), assay_cfg_path))
        else:
            _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \
                   "Aborting execution.".format(assay.upper(), assay_cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)

        return assay_config

    # def update_cfg_assay_with_location_details(self, project, cfg_assay):
    #     cfg_assay_location = ConfigData(gc.CONFIG_FILE_ASSAY_LOCATION.replace('{project}', project))
    #     if cfg_assay_location.loaded:
    #         self.logger.info('Local config file "{}" was loaded and being used.'.format(cfg_assay_location.cfg_path))
    #         cfg_assay = cm.update_dictionary_matching_keys(cfg_assay, cfg_assay_location.get_whole_dictionary())
    #     else:
    #         _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
    #             .format(cfg_assay_location.cfg_path)
    #         self.logger.error(_str)
    #         self.error.add_error(_str)
    #     return cfg_assay

    def update_cfg_dictionary_with_location_details(self, location_path,
                                                    project, cfg_to_update):
        cfg_location = ConfigData(location_path.replace('{project}', project))
        if cfg_location.loaded:
            self.logger.info(
                'Local config file "{}" was loaded and being used.'.format(
                    cfg_location.cfg_path))
            cfg_to_update = cm.update_dictionary_matching_keys(
                cfg_to_update, cfg_location.get_whole_dictionary())
        else:
            _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
                .format(cfg_location.cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)
        return cfg_to_update

    def load_project_config_into_main(self, project):
        # load project specific "project_config" config file
        cfg_project = ConfigData(
            gc.CONFIG_FILE_PROJECT.replace('{project}', project))
        if cfg_project.loaded:
            # if cfg_project was loaded, update it with the environment specific settings (from project_location config)
            cfg_project_updated = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_PROJECT_LOCATION, self.project,
                cfg_project.get_whole_dictionary())
            # update main config with the outcome of the previous updates
            self.conf_main.update(cfg_project_updated)

    def create_trasfer_script_file(self):
        self.logger.info("Start preparing transfer_script.sh file.")
        # path for the script file being created
        sf_path = Path(self.submission_package.submission_dir +
                       "/transfer_script.sh")

        # get script file template
        with open('scripts/' + self.project + '/transfer_script.sh',
                  'r') as ft:
            scr_tmpl = ft.read()

        # update placeholders in the script with the actual values
        smtp_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_env_name'))
        smtp_port = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_port_env_name'))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!smtp!}", smtp_server + ":" + str(smtp_port))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!to_email!}",
            ','.join(self.conf_main.get_value("Email/sent_to_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!from_email!}",
            self.conf_main.get_value("Email/default_from_email"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!send_email_flag!}",
            str(self.conf_main.get_value("Email/send_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!cmd!}",
            self.conf_main.get_value("DataTransfer/transfer_command"))

        # the following will be utilized if mount point is being used by the transfer script (i.e. for Peerless)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mp_cmd!}",
            self.conf_main.get_value("DataTransfer/mount_point_command"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_local_dir!}",
            self.conf_main.get_value("DataTransfer/mount_local_dir"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_remote_dir!}",
            self.conf_main.get_value("DataTransfer/mount_remote_dir"))

        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!source_dir!}", self.submission_package.submission_dir)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!target_dir!}",
            self.conf_main.get_value("DataTransfer/remote_target_dir"))

        ssh_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_server_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_server!}",
                                              str(ssh_server))
        # apply user name as the very last replacement statement, since it can be used as part of previous replacements
        ssh_user = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_user_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_user!}",
                                              str(ssh_user))

        set_permissions = False
        set_perm_value = self.conf_main.get_value("DataTransfer/exec_permis")
        if set_perm_value:
            try:
                exec_permission = eval(set_perm_value.strip())
                set_permissions = True
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during evaluating of "DataTransfer/exec_permis" value ' \
                       '"{}" retrieved from the main config file. Permission setup operation will be skipped. \n{} '\
                    .format(ex, set_perm_value, traceback.format_exc())
                self.logger.warning(_str)
                # self.error.add_error(_str)
                set_permissions = False

        with open(sf_path, "w") as sf:
            sf.write(scr_tmpl)

        if set_permissions:
            try:
                # if permissions to be set were retrieved from config file, set them here
                st = os.stat(sf_path)
                os.chmod(sf_path, st.st_mode | exec_permission)  #stat.S_IXUSR
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during setting up permissions "{}" for the script file ' \
                       '"{}". \n{} '\
                    .format(ex, set_perm_value, sf_path, traceback.format_exc())
                self.logger.warning(_str)
                self.error.add_error(_str)
        else:
            _str = 'Permission setup was skipped for the transfer script file. ' \
                   'Note: value of "DataTransfer/exec_permis" from main config was set to "{}".'\
                                    .format(set_perm_value)
            self.logger.warning(_str)

        self.logger.info("Finish preparing '{}' file.".format(sf_path))

    def disqualify_sub_aliquot(self, sa, details):
        # adds a sub aliquots to the disctionary of disqualified sub_aliquots
        # key = sub-aliquot, value = array of details for disqualification; 1 entry can have multiple detail reasons
        if sa in self.disqualified_sub_aliquots.keys():
            self.disqualified_sub_aliquots[sa].append(details)
        else:
            arr_details = [details]
            self.disqualified_sub_aliquots[sa] = arr_details
        self.logger.warning(
            'Sub-aliquot "{}" was disqualified with the following details: "{}"'
            .format(sa, details))

    def populate_qualified_aliquots(self):
        # reset self.qualified_aliquots array
        self.qualified_aliquots = []
        #select only aliquots that were not disqualified
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            if not sa in self.disqualified_sub_aliquots.keys():
                self.qualified_aliquots.append(a)

    def create_request_for_disqualified_sub_aliquots(self):

        # proceed only if some disqualified sub-aliquots are present
        if self.disqualified_sub_aliquots:

            self.logger.info(
                "Start preparing a request file for disqualified sub-aliquots '{}'."
                .format([val
                         for val in self.disqualified_sub_aliquots.keys()]))

            wb = xlwt.Workbook()  # create empty workbook object
            sh = wb.add_sheet(
                'Submission_Request'
            )  # sheet name can not be longer than 32 characters

            cur_row = 0  # first row for 0-based array
            cur_col = 0  # first col for 0-based array
            #write headers to the file
            headers = self.get_headers()
            for val in headers:
                sh.write(cur_row, cur_col, val)
                cur_col += 1

            cur_row += 1

            for sa in self.sub_aliquots:
                if sa in self.disqualified_sub_aliquots.keys():
                    sh.write(cur_row, 0, self.project)
                    sh.write(cur_row, 1, self.bulk_location)
                    sh.write(cur_row, 2, self.assay)
                    sh.write(cur_row, 3, self.center)
                    sh.write(cur_row, 4, sa)
                    cur_row += 1

            self.disqualified_request_path = Path(
                gc.DISQUALIFIED_REQUESTS + '/' +
                time.strftime("%Y%m%d_%H%M%S", time.localtime()) +
                '_reprocess_disqualified _' + Path(self.filename).stem +
                '.xls')

            # if DISQUALIFIED_REQUESTS folder does not exist, it will be created
            os.makedirs(gc.DISQUALIFIED_REQUESTS, exist_ok=True)

            wb.save(str(self.disqualified_request_path))

            self.logger.info(
                "Successfully prepared the request file for disqualified sub-aliquots and saved in '{}'."
                .format(str(self.disqualified_request_path)))
示例#6
0
import traceback
from utils import Monitor
from utils import ConfigData, common as cm, common2 as cm2, global_const as gc, send_yagmail  #, send_email as email

# if executed by itself, do the following
if __name__ == '__main__':

    gc.CURRENT_PROCCESS_LOG_ID = 'monitor_file'
    # load main config file and get required values
    m_cfg = ConfigData(gc.MAIN_CONFIG_FILE)

    # setup application level logger
    cur_dir = Path(os.path.dirname(os.path.abspath(__file__)))
    mlog, log_handler = cm.setup_logger(m_cfg, cur_dir,
                                        gc.CURRENT_PROCCESS_LOG_ID)
    monitor_path = m_cfg.get_value('Location/monitor_configs')

    # Verify that target directory (df_path) is accessible for the current user (under which the app is running)
    # Identify the user under which the app is running if the df_path is not accessible
    if not os.path.exists(monitor_path):
        _str = 'Directory "{}" does not exist or not accessible for the current user. Aborting execution. ' \
               'Expected user login: "******", Effective user: "******"'.format(monitor_path, os.getlogin(),getpass.getuser())
        mlog.error(_str)

        # send notification email alerting about the error case
        email_subject = 'Error occurred during running file_monitoring tool.'
        email_body = 'The following error caused interruption of execution of the application<br/>' \
                     + str(Path(os.path.abspath(__file__))) \
                     + '<br/><br/><font color="red">' \
                     + _str + '</font>'
        try:
class MappingFileText(File):
    def __init__(self,
                 filepath,
                 conf_source,
                 log_obj,
                 file_type=None,
                 file_delim=None):
        # setup default parameters
        if file_type is None:
            file_type = 1
        if file_delim is None:
            file_delim = ','  #'\t'

        File.__init__(self, filepath, file_type, file_delim)

        self.conf_src = ConfigData('', conf_source)
        self.logger = log_obj
        self.map = {
        }  # it will hold a dict where key is an aliquot id and value is the relative path to the file

        # set file properties before loading it
        self.file_delim = self.conf_src.get_value('file_delim') \
            if self.conf_src.get_value('file_delim') else self.file_delim
        self.header_row_num = self.conf_src.get_value('header_row_num') \
            if self.conf_src.get_value('header_row_num') else self.header_row_num

        # load the file
        self.get_file_content()

    def load_map(self, data_loc):
        disqualify = None
        aliquot_id_col_num = self.conf_src.get_value('aliquot_id_column_num')
        template_fields_col_num = self.conf_src.get_value(
            'template_fields_col_num')
        file_path = self.conf_src.get_value('file_path_template')
        # raw_file_name = self.conf_src.get_value('file_name_template')

        if aliquot_id_col_num is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "aliquot_id_col_num" was not provided.'
        if template_fields_col_num is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "template_fields_col_num" was not provided.'
        if template_fields_col_num is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "file_path_template" was not provided.'
        if file_path is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "file_path_template" was not provided.'
        if not isinstance(aliquot_id_col_num, int):
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Non-integer value was provided for the map file\'s "aliquot_id_col_num" parameter.'
        for entry in template_fields_col_num:
            if not isinstance(template_fields_col_num[entry], int):
                disqualify = ('' if disqualify is None else disqualify + '| ')
                disqualify = disqualify + 'Non-integer value was provided for the map file\'s {} parameter.'.format(
                    entry)

        if disqualify is None:
            row_num = 0
            for row in self.lineList:
                row_num += 1
                if row_num <= self.header_row_num:
                    continue

                cur_aliquot_id = row[aliquot_id_col_num - 1]
                cur_fields = copy.deepcopy(template_fields_col_num)
                cur_raw_file_path = file_path
                # cur_raw_file_name = raw_file_name

                # combine path of the data file for the current row of mapping file
                for fld_name in cur_fields:
                    fld_val = row[cur_fields[fld_name] - 1]
                    cur_raw_file_path = cur_raw_file_path.replace(
                        '{' + fld_name + '}', fld_val)

                # print (str(Path(data_loc) / cur_raw_file_path))
                files = glob.glob(str(Path(data_loc) / cur_raw_file_path))

                if files:
                    for file in files:
                        if not cur_aliquot_id in self.map:
                            self.map[cur_aliquot_id] = []
                        self.map[cur_aliquot_id].append(file)

        return disqualify
示例#8
0
def process_submission():
    # load main config file
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1
    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both config were loaded, load update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())

    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new request files will be posted
    requests_loc = m_cfg.get_value('Location/requests')

    gc.DISQUALIFIED_REQUESTS = m_cfg.get_value(
        'Location/requests_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing request files will be stored
    # (one file per request)
    gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs')
    # path to the folder where all processed (and renamed) requests will be stored
    gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed')
    # path to the folder where created submission packages will be located. One package sub_folder per request.
    # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages')
    # tarball approach to be used for the current deployment
    gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach')
    # flag to save calculated md5sum to a physical file
    gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file')
    # tarball ignore directories
    ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs')
    if ignore_dirs:
        # update default ignore_dirs value with the value from a config file
        gc.TARBALL_IGNORE_DIRS = ignore_dirs

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME
    processed_folder_name = gc.REQ_PROCESSED_DIR  # gc.PROCESSED_FOLDER_NAME

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    email_attchms = []
    transfers = []

    # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles'
    requests_path = Path(requests_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']
    log_warnings = False

    mlog.info(
        'Start processing submission requests in "{}"'.format(requests_path))

    try:

        (_, _, requests) = next(walk(requests_path))
        # print('Study requests: {}'.format(requests))

        mlog.info(
            'Submission requests to be processed (count = {}): {}'.format(
                len(requests), requests))

        req_proc_cnt = 0
        errors_present = 'OK'
        req_path = ''

        # '~$' should filter out temp file created when excel is open
        requests = [file for file in requests if not file.startswith('~$')]

        for req_file in requests:
            if req_file.endswith(('xlsx', 'xls')):
                req_path = Path(requests_path) / req_file

                # transfer_path = ''  # set a default transfer path
                transfer_details = {
                    'transfer_path': '',
                    'request_file': req_file,
                    'process_handler': None,
                    'return_code': None,
                    'return_status': None
                }
                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(req_path))
                    mlog.info(
                        'Request file {} was selected for processing.'.format(
                            req_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    req_obj = Request(req_path, m_cfg)

                    if req_obj and req_obj.loaded:
                        # proceed processing request
                        mlog.info(
                            'Submission request loading status: Success. Submission request file: "{}".'
                            .format(req_path))
                        mlog.info(
                            'Loading local and project related configs for processing the request.'
                        )
                        req_obj.load_request_configuration()
                        if not req_obj.error.exist():
                            mlog.info(
                                'Local config files were loaded with no errors, proceeding to process '
                                'the request file.')
                            req_obj.process_request()
                        else:
                            mlog.info(
                                'Errors were reported during loading local config files. Aborting processing '
                                'this request.')

                        mlog.info(
                            'Processing of Submission request was finished for {}'
                            .format(req_path))

                    req_proc_cnt += 1

                    # print (req_obj.logger._cache)
                    if hasattr(req_obj.logger, '_cache'
                               ):  #verify that _cache attribute is present
                        # check if any warning were recorded to the log file and set a flag log_warnings
                        if 30 in req_obj.logger._cache and req_obj.logger._cache[
                                30]:
                            log_warnings = True
                        # else:
                        #     log_warnings = False
                    else:
                        mlog.warning(
                            'The current logger object has no "_cache" attribute - thus cannot determine '
                            'if any Warnings were reported during the process.'
                        )

                    # identify if any errors were identified and set status variable accordingly
                    if not req_obj.error.exist():
                        if not req_obj.disqualified_sub_aliquots:
                            # no disqualified sub-aliquots present
                            if not log_warnings:
                                fl_status = 'OK'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                                # errors_present = 'OK'
                            else:
                                fl_status = 'OK with Warnings'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK with Disqualifications'
                            _str = 'Processing status: "{}". Submission Request: {}'.format(
                                fl_status, req_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this request: {}' \
                            .format(fl_status, req_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            # update transfer details dictionary with the path to the transfer file
                            transfer_details['transfer_path'] = \
                                Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh'
                            transfers.append(
                                transfer_details
                            )  # add transfer details to transfers list
                            mlog.info(
                                'Since the last request was processed with "{}" status and transfer on '
                                'completion was requested ("--execute_transfer" argument was set to "yes"), '
                                'the following path was put in queue for execution: '
                                '{}'.format(fl_status,
                                            transfer_details['transfer_path']))

                    else:
                        mlog.warning(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            mlog.info(
                                'The transfer on completion request ("--execute_transfer" argument was set to '
                                '"yes") will be ignored since the last request was processed with "{}" status.'
                                .format(fl_status))

                    processed_dir = Path(processed_folder_name)
                    req_processed_name = ts + '_' + fl_status + '_' + req_file
                    file_name_new_path = cm.move_file_to_processed(
                        req_path, req_processed_name, processed_dir,
                        req_obj.logger, req_obj.error)
                    if file_name_new_path:
                        mlog.info(
                            'Processed Submission request "{}" was moved and renamed as: "{}"'
                            .format(req_path,
                                    processed_dir / req_processed_name))
                    else:
                        mlog.warning(
                            'Moving the processed request "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                req_path, req_obj.log_handler.baseFilename))

                    # deactivate the current Request logger
                    deactivate_logger_common(req_obj.logger,
                                             req_obj.log_handler)

                    if req_obj.submission_package and req_obj.submission_package.submission_dir:
                        # save transfer path to a local variable
                        transfer_path = Path(
                            req_obj.submission_package.submission_dir
                        ) / 'transfer_script.sh'
                    else:
                        transfer_path = None

                    # preps for email notification
                    email_msgs.append((
                        '-------------------------------------<br/>'
                        'Requested project: {}'.format(req_obj.project) +
                        '<br/>Requested Experiment: {}.'.format(
                            req_obj.experiment_id) +
                        ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.'
                         .format(req_path, processed_dir /
                                 req_processed_name) if file_name_new_path else
                         '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due '
                         'to some errors</font> reported in the request\'s log file.'
                         .format(req_path)) + '<br/> <b>Errors summary:</b> {}'
                        '<br/> <b>Warning(s) reported:</b> {}'
                        '<br/> <i>Log file location: <br/>{}</i>'
                        '<br/> Submission package locatoin:<br/>{}'
                        '<br/> Data source locatoin:<br/>{}'
                        '<br/> Processed Aliquots:<br/>{}'
                        '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}'
                        '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}'
                        '<br/> Automatic data transferring: {}'
                        '<br/> Command line to run data transferring manually: <br/> {}'
                        ''.format(
                            '<font color="red">Check Errors in the log file.</font>'
                            if req_obj.error.exist() else
                            '<font color="green">No Errors</font> ',
                            '<font color="red">Yes - check the log file.</font>'
                            if log_warnings else 'No',
                            req_obj.log_handler.baseFilename,
                            req_obj.submission_package.submission_dir
                            if req_obj.submission_package else 'N/A',
                            req_obj.attachments.data_loc if req_obj.attachments
                            else 'N/A', req_obj.qualified_aliquots
                            if req_obj.qualified_aliquots else 'None', [
                                val for val in
                                req_obj.disqualified_sub_aliquots.keys()
                            ] if req_obj.disqualified_sub_aliquots else 'None',
                            req_obj.disqualified_request_path,
                            '<font color="green">Performed.</font> '
                            'Additional email should be sent upon data transfer completion.'
                            if len(
                                str(transfer_details['transfer_path']).strip())
                            > 0 else 'Not performed.',
                            str(
                                Path(req_obj.submission_package.submission_dir)
                                / 'transfer_script.sh')
                            if req_obj.submission_package else 'N/A')))
                    email_attchms.append(req_obj.log_handler.baseFilename)

                    # print ('email_msgs = {}'.format(email_msgs))

                    req_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, req_path, traceback.format_exc()))
                    raise

        mlog.info('Number of processed Submission requests = {}'.format(
            req_proc_cnt))

        if req_proc_cnt > 0:
            # collect final details and send email about this study results
            email_subject = 'processing of Submission Requests '
            if errors_present == 'OK':
                if not log_warnings:
                    email_subject = 'SUCCESSFUL ' + email_subject
                else:
                    email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject
            elif errors_present == 'DISQUALIFY':
                email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
            else:
                email_subject = 'ERROR(s) present during ' + email_subject

            email_body = (
                'Number of requests processed: {}.'.format(req_proc_cnt) +
                '<br/><br/>' + '<br/><br/>'.join(email_msgs))
            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=m_cfg.get_value('Email/sent_to_emails'),
                        subject=email_subject,
                        message=email_body,
                        main_conf=m_cfg
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            # perform transfers, if anything qualifies for it
            if transfers and len(transfers) > 0:
                transfer_status_checking_delay = m_cfg.get_value(
                    'General/transfer_status_checking_delay')
                if transfer_status_checking_delay and str(
                        transfer_status_checking_delay).isnumeric():
                    if transfer_status_checking_delay > 0:
                        pass
                    else:
                        transfer_status_checking_delay = None
                else:
                    transfer_status_checking_delay = None

                mlog.info(
                    'Starting processing requested transfers. Total count: {} transfers.'
                    .format(len(transfers)))
                # process all collected transfer requests
                cm.process_transfers(transfers, mlog,
                                     transfer_status_checking_delay)

                # assess results of the transfer processing
                transfer_ok = 0
                transfer_err = 0
                transfer_nd = 0
                for transfer in transfers:
                    if transfer['return_status']:
                        if transfer['return_status'][:2] == 'OK':
                            transfer_ok += 1
                        elif transfer['return_status'][:5] == 'ERROR':
                            transfer_err += 1
                        else:
                            transfer_nd += 1
                    else:
                        transfer_nd += 1

                _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \
                       'transfer(s)'.format(transfer_ok, transfer_err)
                if transfer_nd > 0:
                    _str = _str + ', "ND" - {}'.format(transfer_nd)
                mlog.info(_str)

                # send email with the status of the transfers
                if transfers and len(transfers) > 0:
                    if transfer_err > 0:
                        email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)'
                    else:
                        email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)'

                    email_transfer_msgs = []
                    for transfer in transfers:
                        email_transfer_msgs.append(
                            ('Transfer process for the request file: "{}" '
                             '<br/>Transfer script file:<br/>{}'
                             '<br/>Completion status:<br/>{}'.format(
                                 transfer['request_file'],
                                 transfer['transfer_path'],
                                 transfer['return_status'])))

                    email_body = (
                        'Summary of transfer of prepared submissions:'
                        '<br/>Total count of completed transfers: {}. '
                        '<br/>Status "OK": {} transfer(s)'
                        '<br/>Status "ERROR": {} transfer(s)'
                        '<br/>Status "Not Defined": {} transfer(s)'
                        '<br/><br/>The following are details for each performed transfer:'
                        '<br/><br/>'.format(
                            len(transfers), '<font color="green">' +
                            str(transfer_ok) + '</font>' if transfer_ok > 0
                            else transfer_ok, '<font color="red">' +
                            str(transfer_err) + '</font>' if transfer_err > 0
                            else transfer_err, transfer_nd) +
                        '<br/><br/>'.join(email_transfer_msgs))

                    try:
                        if m_cfg.get_value('Email/send_emails'):
                            email.send_yagmail(emails_to=m_cfg.get_value(
                                'Email/sent_to_emails'),
                                               subject=email_subject,
                                               message=email_body,
                                               main_conf=m_cfg)
                    except Exception as ex:
                        # report unexpected error during sending emails to a log file and continue
                        _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                               'finishing automated transfers. \n{} '\
                            .format(ex, traceback.format_exc())
                        mlog.critical(_str)

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()
示例#9
0
class SubmissionForm:
    def __init__(self,
                 form_name,
                 request,
                 sub_aliquot,
                 aliquot,
                 sample,
                 form_file_name_id=None):
        self.form_name = form_name
        if not form_file_name_id:
            form_file_name_id = form_name
        self.form_file_name_id = form_file_name_id
        self.req_obj = request  # reference to the current request object
        self.sub_aliquot = sub_aliquot
        self.aliquot = aliquot
        self.sample = sample
        self.error = self.req_obj.error
        self.logger = self.req_obj.logger
        self.conf_assay = request.conf_assay

        self.fl_json = None
        self.fl_json_schema = None
        self.fl_cfg_common = None
        self.fl_cfg_assay = None
        # self.fl_cfg_dict = None

        self.prepare_form(form_name)

    def prepare_form(self, form_name):
        forms_location = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' +
                              self.req_obj.project)
        # identify paths for json and config (yaml) files
        fl_path_json_common = forms_location / (form_name + '.json')
        fl_path_json_assay = forms_location / (
            form_name + '_' + str(self.req_obj.assay).lower() + '.json')
        fl_path_json_schema = forms_location / (form_name + '_schema.json')
        fl_path_cfg_common = forms_location / (form_name + '.yaml')

        # fl_path_json_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.json')
        # fl_path_json_assay = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_' +
        #                           str(self.req_obj.assay).lower() + '.json')
        # fl_path_json_schema = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_schema.json')
        # fl_path_cfg_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.yaml')

        # check the value assigned to the current request's data_source_forms_assignment
        # and select assay config file accordingly
        if self.req_obj.data_source_forms_assignment == 'file':
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '.yaml')
        elif self.req_obj.data_source_forms_assignment == 'db':
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '_db.yaml')
        else:  # data_source_forms_assignment = 'db' will be treated as a default assignment
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '.yaml')

        # check if assay specific json exists; if yes - use it, if not - use common one
        if cm.file_exists(fl_path_json_assay):
            fl_path_json = fl_path_json_assay
        else:
            fl_path_json = fl_path_json_common

        # load json and config files
        self.fl_json = FileJson(fl_path_json, self.req_obj.error,
                                self.req_obj.logger)
        self.fl_json_schema = FileJson(fl_path_json_schema, self.req_obj.error,
                                       self.req_obj.logger)
        self.fl_cfg_common = ConfigData(fl_path_cfg_common)
        self.fl_cfg_assay = ConfigData(fl_path_cfg_assay)
        # self.fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)

        # print(self.fl_json.json_data)
        # loop through all json keys and fill those with associated data
        self.get_json_keys(self.fl_json.json_data)
        # print(self.fl_json.json_data)

        # validate final json file against json schema (if present)
        self.validate_json(self.fl_json, self.fl_json_schema)

    def get_json_keys(self, json_node, parent_keys=''):
        for key, val in json_node.items():
            # TODO: add functionality to handle JSON arrays (if those are needed)
            if isinstance(val, dict):
                if parent_keys:
                    cur_parents = '/'.join([parent_keys, key])
                else:
                    cur_parents = key
                self.get_json_keys(val, cur_parents)
            else:
                if parent_keys:
                    full_key_name = '/'.join([parent_keys, key])
                else:
                    full_key_name = key

                # json_node[key] = 'print("{}")'.format(full_key_name)
                # json_node[key] = eval(json_node[key])
                # print("JSON file - {} : {}".format(full_key_name, val))  # val # json_node[key]
                # print("Config Common - {} = {}".format(key, self.fl_cfg_common.get_value(key)))
                # print("Config Assay - {} = {}".format(key, self.fl_cfg_assay.get_value(key)))

                val = self.eval_cfg_value(
                    full_key_name, self.fl_cfg_assay.get_value(full_key_name),
                    self.fl_cfg_common.get_value(full_key_name))
                if str(val).strip() == '':
                    # if returned value is blank, create a warning in the log file
                    self.logger.warning(
                        'Blank value was reported for field "{}" '.format(
                            full_key_name))

                # check if the assigned value is a special expected blank value that don't need to be reported in log
                if str(val).strip(
                ) == gc.SUBMISSION_FORM_EXPECTED_BLANK_VALUE:  # '!!blank!!'
                    json_node[key] = ''
                    self.logger.info(
                        'Field "{}" was assigned with the expected blank ("") value'
                        .format(key))
                else:
                    # assign retrieved key back to associated json key
                    json_node[key] = val
                    self.logger.info(
                        'Field "{}" was assigned with "{}" value'.format(
                            key, val))

                # print(key, '==>', json_node[key])
                pass

    def eval_cfg_value(self, key, assay_cfg_val, common_cfg_val):
        # if assay config key is not provided, use common assay val
        if assay_cfg_val:
            cfg_val = assay_cfg_val
        else:
            cfg_val = common_cfg_val

        eval_flag = gc.SUBMISSION_YAML_EVAL_FLAG  # 'eval!'

        # check if some configuration instruction/key was retrieved for the given "key"
        if cfg_val:
            if eval_flag in str(cfg_val):
                cfg_val = cfg_val.replace(eval_flag,
                                          '')  # replace 'eval!' flag key
                try:
                    out_val = eval(cfg_val)
                except Exception as ex:
                    _str = 'Error "{}" occurred during preparing submission form "{}" for sub-aliquot "{}" ' \
                           'while attempting to interpret configuration key "{}" provided for the form\'s key ' \
                           '"{}". \n{} ' \
                        .format(ex, self.form_name, self.sub_aliquot, cfg_val, key, traceback.format_exc())
                    self.logger.error(_str)
                    self.error.add_error(_str)
                    out_val = ''
            else:
                out_val = cfg_val
        else:
            # requested "key" does not exist neither in assay or common config files
            _str = 'No value was assigned to "{}" key during preparing submission form "{}" for sub-aliquot "{}".' \
                .format(key, self.form_name, self.sub_aliquot)
            self.logger.warning(_str)
            out_val = ''
        return out_val

    def get_tarball_property(self, sa, val_type):

        value = ''
        if self.req_obj.attachments:
            tar_obj = self.req_obj.attachments.aliquots_tarball_dict[sa]
            if tar_obj:
                if val_type == 'name':
                    value = os.path.basename(tar_obj['path'])
                elif val_type == 'md5':
                    value = tar_obj['md5']
        return value

    # it will retrieve any existing property_val from the request object
    def get_request_value(self, property_name, check_dict=False):
        return self.get_property_value_from_object(self.req_obj, property_name,
                                                   check_dict)

    # it will retrieve any existing property_val from the submission_form object
    def get_submission_form_value(self, property_name, check_dict=False):
        return self.get_property_value_from_object(self, property_name,
                                                   check_dict)

    # it will retrieve any existing property_val from rawdata object
    def get_rawdata_value(self, property_name, check_dict=False):
        # return self.get_property_value_from_object(self.req_obj.raw_data.aliquots_data_dict[self.sub_aliquot],
        #                                            property_name, check_dict, 'dict')
        return self.get_sourcedata_value('rawdata', property_name, check_dict)

    # it will retrieve any existing property_val from assay data object
    def get_assaydata_value_by_col_number(self, col_num, check_dict=False):
        # obj = list(self.req_obj.assay_data.aliquots_data_dict[self.sub_aliquot].items())
        # val = self.get_property_value_from_object(obj, col_num - 1, check_dict, 'dict', 'number')
        # if isinstance(val, tuple):
        #     return val[1]
        # else:
        #     return val
        return self.get_sourcedata_value_by_col_number('assaydata', col_num,
                                                       check_dict)

    # it will retrieve any existing property_val from assay data object
    def get_assaydata_value(self, property_name, check_dict=False):
        # return self.get_property_value_from_object(self.req_obj.assay_data.aliquots_data_dict[self.sub_aliquot],
        #                                            property_name, check_dict, 'dict')
        return self.get_sourcedata_value('assaydata', property_name,
                                         check_dict)

    # it will retrieve any existing property_val (specified by the name) from the data source object
    # specified by the data_source_name
    def get_sourcedata_value(self,
                             data_source_name,
                             property_name,
                             check_dict=False):
        if data_source_name in self.req_obj.data_source_names:
            return self.get_property_value_from_object(
                self.req_obj.data_source_objects[data_source_name].
                aliquots_data_dict[self.sub_aliquot], property_name,
                check_dict, 'dict')
        else:
            _str = 'Data source name ({}) requested during populating json submission form "{}" for aliquot id ' \
                   '"{}" does not exists for the current assay.'.format(data_source_name, self.form_name, self.aliquot)
            self.logger.error(_str)
            self.error.add_error(_str)
            return '#ERROR#'

    # it will retrieve any existing property_val (specified by the column number) from the data source object
    # specified by the data_source_name
    def get_sourcedata_value_by_col_number(self,
                                           data_source_name,
                                           col_num,
                                           check_dict=False):
        if data_source_name in self.req_obj.data_source_names:
            obj = list(self.req_obj.data_source_objects[data_source_name].
                       aliquots_data_dict[self.sub_aliquot].items())
            val = self.get_property_value_from_object(obj, col_num - 1,
                                                      check_dict, 'dict',
                                                      'number')
            if isinstance(val, tuple):
                return val[1]
            else:
                return val
        else:
            _str = 'Data source name ({}) requested during populating json submission form "{}" for aliquot id ' \
                   '"{}" does not exists for the current assay.'.format(data_source_name, self.form_name, self.aliquot)
            self.logger.error(_str)
            self.error.add_error(_str)
            return '#ERROR#'

    # it will retrieve a key of a property_val named in "property_val" parameter
    # from the object passed as a reference in "obj" parameter
    # obj_type possible values: "class" (type of "obj" is class),
    #                           "dict" (type of "obj" is dictionary)
    # property_type possible values: "name" ("property_val" is name of property_val),
    #                                "number" ("property_val" is number of items in dictionary)
    # noinspection PyUnusedLocal
    def get_property_value_from_object(self,
                                       obj,
                                       property_val,
                                       check_dict=False,
                                       obj_type='class',
                                       property_type='name'):
        property_val = str(property_val)
        if property_type == 'name':
            # if property_val name is given, proceed here
            if obj_type == 'class':
                get_item = 'obj.' + property_val + ' if hasattr(obj, "' + property_val + '") else ""'
            elif obj_type == 'dict':
                get_item = 'obj["' + property_val + '"] if "' + property_val + '" in obj else ""'
            else:
                get_item = None
        else:
            # if column number is given, proceed here
            get_item = 'obj[' + property_val + ']'

        try:
            out = eval(get_item)

            if check_dict:
                out = cm2.get_dict_value(out, property_val)

        except Exception as ex:
            _str = 'Error "{}" occurred during preparing submission form "{}" for sub-aliquot "{}" ' \
                   'while attempting to evaluate property_val: "{}". \n{} ' \
                .format(ex, self.form_name, self.sub_aliquot, get_item, traceback.format_exc())
            self.logger.error(_str)
            self.error.add_error(_str)
            out = ''
        return out

    # converts an array of values (i.e. list of aliquots) in to list of dictionaries with a given key name
    # For example: [1, 2, 3] => [{name: 1}, {name: 2}, {name: 3}]
    @staticmethod
    def convert_simple_list_to_list_of_dict(sm_arr, key_name):
        out = []
        for a in sm_arr:
            dict_ob = {key_name: a}
            out.append(dict_ob)
        return out

    def validate_json(self, json_file, schema_file):
        try:
            validate(json_file.json_data, schema_file.json_data)
            _str = 'Validation of "{}" against "{}" was successful.'.format(
                json_file.filepath, schema_file.filepath)
            self.logger.info(_str)
        except jsonschema.exceptions.ValidationError as ve:
            _str = 'Validation of "{}" file against schema "{}" failed with the following error: \n{}' \
                .format(json_file.filepath, schema_file.filepath, ve)
            self.logger.error(_str)
            self.error.add_error(_str)
class Inquiry(File):
    def __init__(self, filepath, conf_main=None, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        self.sheet_name = sheet_name  # .strip()

        if conf_main:
            self.conf_main = conf_main
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)

        self.error = InquiryError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Download Inquiry file {}'.format(filepath))
        self.inq_match_arr = []
        self.columns_arr = []
        self.inq_sources = {}
        self.inq_line_sources = {}

        # load common for all programs dictionary config
        self.conf_dict = DictConfigData(gc.CONFIG_FILE_DICTIONARY)
        if not self.conf_dict.loaded:
            # disqualify the current inquiry file
            _str = 'Aborting processing of the inquiry file - the following common dictionary config file cannot ' \
                   'be loaded: {}.'.format(gc.CONFIG_FILE_MAIN)
            self.error.add_error(_str)
            self.logger.error(_str)
            return

        # save inquiry file structure into a dedicated variables
        self.file_structure_by_col_num = self.conf_dict.get_inqury_file_structure(
            'by_col_num')
        self.file_structure_by_col_name = self.conf_dict.get_inqury_file_structure(
            'by_col_name')

        self.processed_folder = gc.INQUIRY_PROCESSED_DIR
        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(self.processed_folder):
            self.processed_folder = Path(self.wrkdir) / self.processed_folder
        else:
            self.processed_folder = Path(self.processed_folder)

        self.download_request_path = None

        self.disqualified_items = {}
        self.disqualified_inquiry_path = ''  # will store path to a inquiry file with disqualified sub-aliquots

        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.INQUIRY_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_process_entity = None

        self.db_access = DBAccess(self.logger, self.conf_main, self.error)

        self.get_file_content()

    def get_file_content(self):
        if not self.columns_arr or not self.lines_arr:
            self.columns_arr = []
            self.lines_arr = []
            if cm.file_exists(self.filepath):
                self.logger.debug('Loading file content of "{}"'.format(
                    self.filepath))

                with xlrd.open_workbook(self.filepath) as wb:
                    if not self.sheet_name or len(self.sheet_name) == 0:
                        # by default retrieve the first sheet in the excel file
                        sheet = wb.sheet_by_index(0)
                    else:
                        # if sheet name was provided
                        sheets = wb.sheet_names()  # get list of all sheets
                        if self.sheet_name in sheets:
                            # if given sheet name in the list of available sheets, load the sheet
                            sheet = wb.sheet_by_name(self.sheet_name)
                        else:
                            # report an error if given sheet name not in the list of available sheets
                            _str = (
                                'Given worksheet name "{}" was not found in the file "{}". '
                                'Verify that the worksheet name exists in the file.'
                            ).format(self.sheet_name, self.filepath)
                            self.error.add_error(_str)
                            self.logger.error(_str)

                            self.lines_arr = None
                            self.loaded = False
                            return self.lines_arr

                sheet.cell_value(0, 0)

                lines = [
                ]  # will hold content of the inquiry file as an array of arrays (rows)
                columns = []
                for i in range(sheet.ncols):
                    column = []
                    for j in range(sheet.nrows):
                        if i == 0:
                            lines.append(
                                []
                            )  # adds an array for each new row in the inquiry file

                        # print(sheet.cell_value(i, j))
                        cell = sheet.cell(j, i)
                        cell_value = cell.value
                        # take care of number and dates received from Excel and converted to float by default
                        if cell.ctype == 2 and int(cell_value) == cell_value:
                            # the key is integer
                            cell_value = str(int(cell_value))
                        elif cell.ctype == 2:
                            # the key is float
                            cell_value = str(cell_value)
                        # convert date back to human readable date format
                        # print ('cell_value = {}'.format(cell_value))
                        if cell.ctype == 3:
                            cell_value_date = xlrd.xldate_as_datetime(
                                cell_value, wb.datemode)
                            cell_value = cell_value_date.strftime(
                                "%Y-%m-%directory")
                        column.append(
                            cell_value
                        )  # adds value to the current column array
                        # lines[j].append('"' + cell_value + '"')  # adds value in "csv" format for a current row
                        lines[j].append(cell_value)

                    # self.columns_arr.append(','.join(column))
                    columns.append(
                        column)  # adds a column to a list of columns

                # populate lines_arr and columns_arr properties
                self.lines_arr = lines
                self.columns_arr = columns

                # populate lineList value as required for the base class
                self.lineList = []
                for ln in lines:
                    self.lineList.append(','.join(str(ln)))

                wb.unload_sheet(sheet.name)

                # perform validation of the current inquiry file
                self.validate_inquiry_file()

                if self.error.exist():
                    # report that errors exist
                    self.loaded = False
                    # print(self.error.count)
                    # print(self.error.get_errors_to_str())
                    _str = 'Errors ({}) were identified during validating of the inquiry. \nError(s): {}'.format(
                        self.error.count, self.error.get_errors_to_str())
                else:
                    self.loaded = True

            else:
                _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format(
                    self.filepath)
                self.error.add_error(_str)
                self.logger.error(_str)

                self.columns_arr = None
                self.lines_arr = None
                self.loaded = False
        return self.lineList

    def validate_inquiry_file(self):
        self.logger.info(
            'Start validating the current inquiry file "{}".'.format(
                self.filepath))
        row_count = 1
        failed_cnt = 0
        valid_aliquot_flag = self.conf_main.get_value(
            'Validate/aliquot_id_vs_manifest')
        valid_inquiry_values_flag = self.conf_main.get_value(
            'Validate/inquiry_values_vs_dictionary')
        inquiry_min_number_columns = self.conf_main.get_value(
            'Validate/inquiry_min_number_columns')
        inquiry_validate_number_columns = self.conf_main.get_value(
            'Validate/inquiry_validate_number_columns')
        if not inquiry_min_number_columns or not isinstance(
                inquiry_min_number_columns, int):
            inquiry_min_number_columns = 6  # set a default value if it is not provided in the config file
        if not inquiry_validate_number_columns or not isinstance(
                inquiry_validate_number_columns, int):
            inquiry_validate_number_columns = 6  # set a default value if it is not provided in the config file

        for row in self.lines_arr:
            if row_count == self.header_row_num:  # 1
                # skip the first column as it is a header
                row_count += 1
                continue

            sub_al = 'ND'  # set blank value as default
            assay = ''  # set blank value as default
            valid_aliquot_performed = False
            skip_final_check = False

            # check if inquiry file contain min number of columns
            if len(row) < inquiry_min_number_columns:
                # disqualify the current inquiry file
                _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \
                    .format(len(row), inquiry_min_number_columns)
                self.error.add_error(_str)
                self.logger.error(_str)
                return
            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # get sub-aliquot value before looping through all fields, so it can be used for reporting errors
            # also get program_code assigned to the row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', row)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', row, False)

            # validate program_code value
            if conf_dict.key_exists_in_dict(
                    str(program_code).lower(), 'program_code'):
                # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
                conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\
                    .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code'))
                conf_dict_program = ConfigData(conf_dict_program_path)
                conf_dict.update(conf_dict_program.get_whole_dictionary())
            else:
                _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \
                    .format(program_code, row_count)
                self.logger.critical(_str)
                # disqualify an inquiry file row, if unexpected value was provided
                self.disqualify_inquiry_item(sub_al, _str, row)
                failed_cnt += 1
                skip_final_check = True

            if not skip_final_check:
                # go through fields and validate the provided values
                for i in range(len(row)):
                    if i + 1 > inquiry_validate_number_columns:
                        # if number of columns in the inquiry file > expected maximum, exit the loop
                        break
                    col_category = conf_dict.get_dict_value(
                        str(i + 1), 'inquiry_file_structure')
                    if col_category in ('program_code', 'sub-aliquot'):
                        # no checking is needed for the listed field, proceed further
                        continue
                    elif col_category == 'db_center_id':
                        # get center id value and validate it
                        db_center_id = row[i]
                        # validate center_code or center_id value
                        self.logger.info(
                            'Start validation of center value "{}" provided for the current row'
                            .format(db_center_id))
                        db = DBAccess(self.logger, self.conf_main,
                                      self.error)  # create DBAccess object
                        db.open_connection()
                        # test center value assuming center code was provided
                        dataset = db.validate_center_code(
                            db_center_id, program_code, 'code', 'code')
                        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
                            dataset, 'center_id', 'center_code')
                        if center_id_out1:
                            # center id was returned, meaning center was validated fine
                            db_center_id = center_id_out1
                        else:
                            # if center code was not validated at first attempt, validate it assuming the center id was given
                            dataset = db.validate_center_code(
                                db_center_id, program_code, 'id', 'code')
                            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                                dataset, 'center_id', 'center_id')
                            if center_id_out2:
                                # center id was validated at the 2nd attempt, ignore the 1st validation attempt
                                db_center_id = center_id_out2
                            else:
                                # center validation attempts failed, report both failures
                                _str = 'Provided center value cannot be interpreted neither as code nor id; ' \
                                       'here are both validation outcomes: ' + \
                                       ' | '.join([_str_err_out1, _str_err_out2])
                                self.logger.warning(_str)
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                break

                        # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value
                        if valid_aliquot_flag:
                            # aliquot id validation is required
                            valid_aliquot_performed = True  # flag that aliquot validation was done
                            if isinstance(db_center_id,
                                          int):  # db_center_id.isnumeric():
                                # since center is numeric, proceed here
                                # get aliquot id based on the verified earlier assay value and given sub_aliquot id
                                aliquot = conf_dict.convert_sub_aliq_to_aliquot(
                                    sub_al, assay)
                                valid_status, valid_desc = self.db_access.validate_aliquot_id(
                                    aliquot, db_center_id)
                                if valid_status != 'OK':
                                    # disqualify an inquiry file row, if returned status is not OK
                                    _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \
                                           'of the database. DB response => Status: "{}"; Description: "{}".'\
                                        .format(aliquot, row_count, valid_status, valid_desc)
                                    self.logger.warning(_str)
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                            else:
                                # report unexpected center id value
                                _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \
                                       'critical error because this value is required (based on the configuration setting ' \
                                       '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \
                                    .format(db_center_id, row_count, i + 1, sub_al)
                                self.logger.warning(_str)
                                # disqualify an inquiry file row, if unexpected value was provided
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                # break
                        else:
                            self.logger.info(
                                'Validating of the provided aliquot_id "{}" is not required based on the '
                                'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".'
                                .format(sub_al, valid_aliquot_flag))
                    else:
                        if col_category == 'assay':
                            assay = row[i].strip().lower(
                            )  # save assay value to a dedicated variable
                        if valid_inquiry_values_flag:
                            # if validation of the inquiry values vs dictionary is required
                            validate_values = []
                            validate_categories = []
                            if col_category == 'bulk_location':
                                # get inquiry_file_structure_bulk_location value
                                bulk_value_delim = conf_dict.get_dict_value(
                                    'inquiry_file_structure_bulk_location_delim',
                                    '')
                                validate_values = str(
                                    row[i]).split(bulk_value_delim)
                                validate_categories = conf_dict.get_dict_object(
                                    'inquiry_file_structure_bulk_location', '')
                            else:
                                validate_values.append(str(row[i]).lower())
                                validate_categories.append(col_category)
                            for vv, vc in zip(validate_values,
                                              validate_categories):
                                if not conf_dict.key_exists_in_dict(
                                        vv.lower(), vc):
                                    if col_category == 'bulk_location':
                                        _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \
                                               'the "bulk_location" value (line #{}, column #{})' \
                                            .format(vv, vc, row_count, i + 1)
                                    else:
                                        _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\
                                            .format(vv, vc, row_count, i+1)
                                    self.logger.critical(_str)
                                    # disqualify an inquiry file row, if unexpected value was provided
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                    if skip_final_check:
                        break

            # check that if aliquot validation is required it was actually performed
            if not skip_final_check:
                if valid_aliquot_flag and not valid_aliquot_performed:
                    _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \
                           '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \
                           'was not provided). ' \
                        .format(row_count)
                    self.logger.critical(_str)
                    # disqualify an inquiry file row, if unexpected value was provided
                    self.disqualify_inquiry_item(sub_al, _str, row)
                    failed_cnt += 1

            row_count += 1

        self.logger.info('Finish validating the inquiry file with{}.'.format(
            ' no errors' if failed_cnt == 0 else
            ' errors; {} records were disqualified - see earlier log entries for details'
            .format(failed_cnt)))

    def check_validation_dataset_outcome(self, dataset, validation_id_column,
                                         validation_id_name):
        _str_err = ''
        validation_id_out = None
        if dataset:
            for row in dataset:
                if 'status' in row:
                    status = row['status']
                if 'description' in row:
                    description = row['description']
                if validation_id_column in row:  # center_id
                    validation_id = row[validation_id_column]
                break  # read only first row of the dataset
            if status == 'OK':  # validation was successful
                validation_id_out = validation_id
            elif status == 'Failed':  # validation has failed
                _str_err = 'Validation of the provided {} value vs DB has Failed, description: {}'\
                    .format(validation_id_name, description)
            else:  # unexpected status value was returned
                _str_err = 'Validation of the provided {} value vs DB returned unexpected status {}'\
                    .format(validation_id_name, status)
        else:
            _str_err = 'Unexpected error was reported during validating {} in the DB. ' \
                       'Check earlier entries in the log file.'\
                .format(validation_id_name)

        return _str_err, validation_id_out

    def setup_logger(self, wrkdir, filename):

        # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)

        log_folder_name = gc.INQUIRY_LOG_DIR  # gc.LOG_FOLDER_NAME

        # m_logger_name = gc.MAIN_LOG_NAME
        # m_logger = logging.getLogger(m_logger_name)

        logger_name = gc.INQUIRY_LOG_NAME
        logging_level = self.conf_main.get_value('Logging/inquiry_log_level')

        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(log_folder_name):
            log_folder_path = Path(wrkdir) / log_folder_name
        else:
            log_folder_path = Path(log_folder_name)

        lg = setup_logger_common(
            logger_name,
            logging_level,
            log_folder_path,  # Path(wrkdir) / log_folder_name,
            str(filename) + '_' +
            time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log')

        self.log_handler = lg['handler']
        return lg['logger']

    # function will combine the datasource_id for the current inquiry line
    # it is possible that different lines will have the same datasource_id and thus can share the datasource
    def get_inquiry_line_datasource_id(self, inq_line):
        datasource_id = ''

        for col in self.file_structure_by_col_name:
            if col in ['program_code', 'assay', 'source_id']:
                datasource_id += '|' + self.get_inquiry_value_by_field_name(
                    col, inq_line)
            elif 'source_' in col:
                datasource_id += '|' + self.get_inquiry_value_by_field_name(
                    col, inq_line, False)

        return datasource_id

    def get_inquiry_value_by_field_name(self,
                                        field_name,
                                        inq_line,
                                        validate_by_dictionary=None):
        if validate_by_dictionary is None:
            validate_by_dictionary = True  # set default value to True

        if field_name in self.file_structure_by_col_name:
            col_num = self.file_structure_by_col_name[field_name]
            value = inq_line[col_num - 1].strip()
        else:
            value = ''
        # validate the provided program code through the dictionary
        if validate_by_dictionary:
            value = self.conf_dict.get_dict_value(
                str(value).lower(), field_name)
        return value

    def process_inquiry_sources(self):
        cur_row = 0
        for inq_line in self.lines_arr:
            if cur_row == self.header_row_num - 1:
                # skip the header row
                cur_row += 1
                continue

            # get program code assigned to the current row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)
            # get assay assigned to the current row
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            # get source id assigned to the current row
            source_id = self.get_inquiry_value_by_field_name(
                'source_id', inq_line)

            # get source config file
            # 2 values are saved in tuple: program name specific path and default one.
            # if program name specific path does not exist, the default will be used
            cfg_source_path = (
                # configuration path for the current program by name
                gc.CONFIG_FILE_SOURCE_PATH\
                    .replace('{program}', program_code)\
                    .replace('{assay}', assay)\
                    .replace('{source_id}', source_id),
                # configuration path for the default program (used if no program specific path is present)
                gc.CONFIG_FILE_SOURCE_PATH \
                    .replace('{program}', 'default') \
                    .replace('{assay}', assay) \
                    .replace('{source_id}', source_id)
            )
            # get the source location config file path
            cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace(
                '{source_id}', source_id)

            # attempt to load configuration for the program specific path
            cfg_source = ConfigData(Path(cfg_source_path[0]))
            if not cfg_source.loaded:
                # if config was not loaded from the program specific path, load the default one
                cfg_source = ConfigData(Path(cfg_source_path[1]))

            if cfg_source.loaded:
                # proceed here if the source config was loaded
                # load source location config with location specific settings for the current source
                cfg_source_location = ConfigData(
                    Path(cfg_source_location_path))
                if cfg_source_location.loaded:
                    # if the source location config was loaded, update cfg_source config with the source location config
                    cfg_source.update(
                        cfg_source_location.get_whole_dictionary())

                # get unique id of the datasource and check if the same id was used already, reuse that in such case
                inq_line_datasource_id = self.get_inquiry_line_datasource_id(
                    inq_line)
                self.logger.info(
                    'Current inquiry row #{} was identified with the following data source id: {}'
                    .format(cur_row, inq_line_datasource_id))
                # assign source id (inq_line_datasource_id) to the current inquiry line
                self.inq_line_sources[cur_row] = inq_line_datasource_id
                if inq_line_datasource_id in self.inq_sources:
                    # reuse existing datasource
                    self.logger.info(
                        'Identified data source id for the current inquiry row #{} was identified as '
                        'earlier retrieved one (for this or another row) and will be re-used for '
                        'the current row.'.format(cur_row))
                else:
                    # create a new datasource object
                    inq_line_datasource = DataSource(self, cfg_source,
                                                     inq_line,
                                                     inq_line_datasource_id)
                    self.inq_sources[
                        inq_line_datasource_id] = inq_line_datasource
            else:
                sub_al = self.get_inquiry_value_by_field_name(
                    'sub-aliquot', inq_line, False)
                _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \
                       'None of the expected to exist files is accessible: {}'\
                    .format(cur_row, sub_al, ' | '.join(cfg_source_path))
                self.logger.warning(_str)
                self.disqualify_inquiry_item(
                    sub_al, _str, cur_row
                )  # TODO: verify if inq_line should be used instead of curr_row
            cur_row += 1
        pass

    def process_inquiry(self):
        self.process_inquiry_sources()
        self.match_inquiry_items_to_sources()
        self.create_download_request_file()
        self.create_inquiry_file_for_disqualified_entries()

        # check for errors and put final log entry for the inquiry.
        if self.error.exist():
            _str = 'Processing of the current inquiry was finished with the following errors: {}\n'.format(
                self.error.get_errors_to_str())
            self.logger.error(_str)
        else:
            _str = 'Processing of the current inquiry was finished successfully.\n'
            self.logger.info(_str)

    def match_inquiry_items_to_sources(self):
        cur_row = -1
        for inq_line in self.lines_arr:
            cur_row += 1  # increase row counter
            if cur_row == self.header_row_num - 1:
                continue

            # program_code = str(inq_line[0]) # get program code that must be a first column
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)

            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
            conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace(
                '{program}', program_code)
            conf_dict_program = ConfigData(conf_dict_program_path)
            conf_dict.update(conf_dict_program.get_whole_dictionary())

            # print (inq_study_path)
            bulk_location = self.get_inquiry_value_by_field_name(
                'bulk_location', inq_line, False)
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', inq_line, False)

            # inq_study_path = '/'.join([program_code, bulk_location, assay])
            inq_study_path = self.conf_main.get_value(
                'Destination/study_path_template')
            inq_study_path = inq_study_path.replace('{program_code}',
                                                    program_code)
            inq_study_path = inq_study_path.replace('{bulk_location}',
                                                    bulk_location)
            inq_study_path = inq_study_path.replace('{assay}', assay)

            # check if current sub-aliquot is not part of disqualified items array
            if self.disqualified_items and sub_al in self.disqualified_items.keys(
            ):
                # if sub-aliquot was disqualifed already, skip this line
                continue

            # identify aliquot for the given sub-aliquot
            al = conf_dict.convert_sub_aliq_to_aliquot(
                sub_al, assay)  # identify aliquot for the current inquiry line

            match = False

            # get reference to the Datasource object assigned to the current row
            if cur_row in self.inq_line_sources:
                cur_source = self.inq_sources[self.inq_line_sources[cur_row]]
            else:
                # if the data source was not assigned to the current row, skip the row using this datasource
                cur_source = None
                continue
            # check if any source types were disqualified during loading the datasource
            if cur_source.disqualified_data_sources:
                # if at least one source of the datasource was disqualified, skip the row using this datasource
                # and disqualify the current sub-aliquot as well
                self.disqualify_inquiry_item(
                    sub_al,
                    'Datasource associated with this aliquot_id was marked as disqualified.',
                    inq_line)
                continue

            # get a copy of the source type ids of the current datasource;
            # it will track number of items found for each source type
            cur_source_types = copy.deepcopy(cur_source.source_types)

            # loop through items of the source
            for src_item in cur_source.source_content_arr:
                match_out = False
                # attempt match by the sub-aliquot
                match_out, match_details = \
                    self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al)
                if match_out:
                    match = True
                # if sub-aliquot match was not success, attempt to match by the aliquot
                elif src_item['aliquot_match']:
                    match_out, match_details = \
                        self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al)
                    if match_out:
                        match = True
                # if a match was found using one of the above methods, record the item to inq_match_arr
                if match_out:
                    # since a match was found, verify that the source path is accessible (except for web locations)
                    web_loc = src_item['web_location']
                    # real_path = os.path.realpath(src_item['path'])  # real path of the current item

                    if web_loc or not web_loc and os.path.exists(
                            src_item['path']):
                        item_details = {
                            'sub-aliquot':
                            sub_al,
                            'study':
                            inq_study_path,
                            # 'source': src_item,
                            'source_item_name':
                            src_item['name'],
                            'target_subfolder':
                            src_item['target_subfolder'],
                            'real_path':
                            src_item['path'],
                            'target_copied_item_name':
                            src_item['target_copied_item_name'],
                            'match_details':
                            match_details,
                            'source_type_id':
                            src_item['source_type_id'],
                            'obj_type':
                            src_item['obj_type'],
                            'source_name_generic':
                            cur_source.source_name_generic
                        }
                        self.inq_match_arr.append(item_details)
                        # record the source type id of an item to track quantity of found matches for each source type
                        cur_source_types[
                            src_item['source_type_id']]['items_count'] += 1
                    else:
                        self.disqualify_inquiry_item(
                            sub_al,
                            'A match was found, but the identified source path is not accessible. Match details: {}. '
                            'Source path: "{}". Real source path: "{}".'.
                            format(match_details, src_item['path'],
                                   src_item['path']), inq_line)

            # report if no match was found and
            # verify that a match was found for each of the source types of the current datasource
            if not match:
                # no matches were found for the current datasource
                self.disqualify_inquiry_item(
                    sub_al,
                    'No matching items (files/folders) were found in the current data source.',
                    inq_line)
            else:
                if not cur_source.allow_nomatch_per_sourcetype:
                    # some matches were found; verify that a match was found for each of the source types
                    for src_type in cur_source_types:
                        if cur_source_types[src_type]['items_count'] == 0:
                            # no matches were found for this source type
                            self.disqualify_inquiry_item(
                                sub_al,
                                'No matches were found for the "{}" source type id in the datasource.'
                                .format(src_type), inq_line)

    def is_item_found_soft_match(self, srch_item, srch_in_str, soft_match_arr,
                                 item_to_be_reported):
        out = False
        _str = ''
        # identify if the search is performed for sub_aliquot (full value) or aliquot (partial value)
        if srch_item == item_to_be_reported:
            entity = 'sub-aliquot'
        else:
            entity = 'aliquot'

        soft_match = False
        self.logger.debug("srch_item = {}| srch_in_str = {}".format(
            srch_item, srch_in_str))
        if srch_item in srch_in_str:
            out = True
            self.logger.debug("Exact match found between: {} | {}".format(
                srch_item, srch_in_str))
        else:
            if soft_match_arr:
                self.logger.debug("Starting soft match for: {} | {}".format(
                    srch_item, srch_in_str))
                for item in soft_match_arr:
                    srch_in_str = srch_in_str.replace(item['find'],
                                                      item['replace'])
                    srch_item = srch_item.replace(item['find'],
                                                  item['replace'])
                self.logger.debug(
                    "Updated for soft match: srch_item = {}| srch_in_str = {}".
                    format(srch_item, srch_in_str))
                if srch_item in srch_in_str:
                    out = True
                    soft_match = True
                    self.logger.debug(
                        "Soft match found between: {} | {}".format(
                            srch_item, srch_in_str))
        # prepare log entry
        if out:
            _str = str('Loose' if soft_match else 'Exact') + \
                   ' match was ' + \
                   'found for {} item "{}". Match values are as following: "{}" and "{}".'\
                       .format(entity, item_to_be_reported, srch_item, srch_in_str)

        # log outcome of the match process, the "soft" match will logged as warning
        if out:
            if entity == 'aliquot':
                # if match was found by aliquot (partial id value), always report it as "warning"
                self.logger.warning(_str)
            else:
                # proceed here if match was found by sub-aliquot (full id value)
                if soft_match:
                    self.logger.warning(_str)
                else:
                    self.logger.info(_str)

        # prepare match details to output from this function
        match_type = ''
        if soft_match:
            # this was a soft match
            if entity == 'aliquot':
                match_type = 'loose/aliquot'
            else:
                match_type = 'loose'
        else:
            # this was an exact match
            if entity == 'aliquot':
                match_type = 'exact/aliquot'
            else:
                match_type = 'exact'

        out_details = {'match_type': match_type, 'details': _str}
        return out, out_details

    def create_download_request_file(self):
        self.logger.info("Start preparing download_request file.")
        # path for the script file being created
        rf_path = Path(gc.OUTPUT_REQUESTS_DIR + "/" +
                       time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '_' +
                       self.filename.replace(' ', '') + '.tsv')

        self.download_request_path = rf_path

        if not self.inq_match_arr:
            self.logger.warning(
                'No inquiries with matched datasources exists for the current inquiry file. '
                'Skipping creating a download request file.')
            return

        with open(rf_path, "w") as rf:
            # write headers to the file
            headers = '\t'.join([
                'Source', 'Destination', 'Aliquot_id', 'Obj_Type',
                'Target_Item_Name'
            ])
            rf.write(headers + '\n')

            for item in self.inq_match_arr:
                src_path = item['real_path']  # item['source']['path']

                #prepare values for the current inquiry row to put into the outcome file
                # project_path = self.conf_process_entity.get_value('Destination/location/project_path')
                bulk_data_path = self.conf_main.get_value(
                    'Destination/bulk_data_path')
                study_path = item['study']
                target_subfolder = item[
                    'target_subfolder']  # item['source']['target_subfolder']
                sub_aliquot = item['sub-aliquot']
                obj_type = item['obj_type']
                target_copied_item_name = item['target_copied_item_name']

                # check if current sub-aliquot is not part of disqualified items array
                if self.disqualified_items and sub_aliquot in self.disqualified_items.keys(
                ):
                    # if sub-aliquot was disqualifed already, skip this line
                    continue

                # get template for the destination path and replace placeholders with values
                # "{project_path}/{study_path}/{target_subfolder}"
                dest_path = self.conf_main.get_value(
                    'Destination/path_template')
                dest_path = dest_path.replace('{bulk_data_path}',
                                              bulk_data_path)
                dest_path = dest_path.replace('{study_path}', study_path)
                dest_path = dest_path.replace('{target_subfolder}',
                                              target_subfolder)

                line = '\t'.join([
                    str(src_path),
                    str(Path(dest_path)),
                    str(sub_aliquot),
                    str(obj_type), target_copied_item_name
                ])
                rf.write(line + '\n')

        self.logger.info(
            "Finish preparing download_request file '{}'.".format(rf_path))

    def disqualify_inquiry_item(self, sa, disqualify_status, inquiry_item):
        # adds a sub aliquots to the dictionary of disqualified items
        # key = sub-aliquot, values: dictionary with 2 values:
        #       'status' - reason for disqualification
        #       'inquiry_item: array of values for inquiry row from an inquiry file
        details = {'status': disqualify_status, 'inquiry_item': inquiry_item}
        if not sa in self.disqualified_items:
            self.disqualified_items[sa] = details
            self.logger.warning(
                'Sub-aliquot "{}" was disqualified with the following status: "{}"'
                .format(sa, disqualify_status))
        else:
            self.logger.warning(
                'Sub-aliquot "{}" was already disqualified earlier. '
                'The following disqualification call will be ignored: "{}"'.
                format(sa, disqualify_status))

    def create_inquiry_file_for_disqualified_entries(self):
        if self.disqualified_items:
            self.logger.info(
                "Start preparing inquiry file for disqualified sub-aliquots.")
            # path for the script file being created

            wb = xlwt.Workbook()  # create empty workbook object
            sh = wb.add_sheet(
                'Re-process_inquiry'
            )  # sheet name can not be longer than 32 characters

            cur_row = 0  # first row for 0-based array
            cur_col = 0  # first col for 0-based array
            # write headers to the file
            headers = self.lines_arr[0]
            for val in headers:
                sh.write(cur_row, cur_col, val)
                cur_col += 1

            cur_row += 1

            for di in self.disqualified_items:
                fields = self.disqualified_items[di]['inquiry_item']
                cur_col = 0
                for val in fields:
                    sh.write(cur_row, cur_col, val)
                    cur_col += 1
                cur_row += 1

            if not os.path.isabs(gc.DISQUALIFIED_INQUIRIES):
                disq_dir = Path(self.wrkdir) / gc.DISQUALIFIED_INQUIRIES
            else:
                disq_dir = Path(gc.DISQUALIFIED_INQUIRIES)

            # if DISQUALIFIED_INQUIRIES folder does not exist, it will be created
            os.makedirs(disq_dir, exist_ok=True)

            # identify path for the disqualified inquiry file
            self.disqualified_inquiry_path = Path(
                str(disq_dir) + '/' +
                time.strftime("%Y%m%d_%H%M%S", time.localtime()) +
                '_reprocess_disqualified_' +
                # .stem method is used to get file name without an extension
                Path(self.filename).stem.replace(' ', '') + '.xls')

            wb.save(str(self.disqualified_inquiry_path))

            self.logger.info(
                "Successfully prepared the inquiry file for disqualified sub-aliquots and saved in '{}'."
                .format(str(self.disqualified_inquiry_path)))
示例#11
0
class Monitor():
    def __init__(self, cfg_monitor_path, log_obj):
        self.action_completed = False
        self.status = []

        self.mtr_cfg_path = cfg_monitor_path
        self.log = log_obj
        self.error = MonitorError(self)
        self.mtr_cfg = ConfigData(cfg_monitor_path)
        if self.validate_config_file():
            self.loaded = True
        else:
            self.loaded = False
        cur_cfg_dir = os.path.dirname(cfg_monitor_path)
        cur_cfg_file_name = Path(os.path.abspath(cfg_monitor_path)).name
        stamp_dir = Path(str(cur_cfg_dir) + '/' + gc.STAMPS_FILES_FOLDER_NAME)
        if not os.path.exists(stamp_dir):
            os.mkdir(stamp_dir)
        stamp_file = Path(
            str(stamp_dir) + '/' +
            cur_cfg_file_name.replace('.yaml', '_stamp.yaml'))
        self.verify_config_stamp_file(stamp_file)
        self.mtr_cfg_stamp = ConfigData(stamp_file)

        self.mtr_source = None
        self.mtr_source_path = None

        if self.loaded:
            # get config file values
            self.mtr_source_dir = Path(
                cm.eval_cfg_value(
                    self.mtr_cfg.get_value('Location/source_dir'), self.log,
                    None))
            self.mtr_source_file = Path(
                self.mtr_cfg.get_value('Location/source_file'))
            found_files = cm.find_file_in_dir(self.mtr_source_dir,
                                              self.mtr_source_file, False)
            if found_files:
                ff_stamp = None
                for file_match in found_files:
                    if not ff_stamp or ff_stamp < os.stat(
                            Path(self.mtr_source_dir) / file_match).st_mtime:
                        ff_stamp = os.stat(
                            Path(self.mtr_source_dir) / file_match).st_mtime
                        self.mtr_source = file_match
                # self.mtr_source = found_files[0]
                self.mtr_source_path = Path(
                    self.mtr_source_dir) / self.mtr_source
            # else:
            #    self.mtr_source = None
            #    self.mtr_source_path = None
            self.mtr_destin = self.mtr_cfg.get_value('Location/destination')
            self.mtr_item = self.mtr_cfg.get_value('Monitoring/item')
            self.mtr_type = self.mtr_cfg.get_value('Monitoring/type')
            self.mtr_action = self.mtr_cfg.get_value('Monitoring/action')
            self.mtr_frequency = self.mtr_cfg.get_value('Monitoring/frequency')
            # self.mtr_email = self.mtr_cfg.get_value('Monitoring/email_notification')
            # self.mtr_email_cc = self.mtr_cfg.get_value('Monitoring/email_cc')
            # load stamp info from stamp config file
            self.mtr_sync_date = self.mtr_cfg_stamp.get_value(
                'Last_sync/date_time')
            self.mtr_watch_value = self.mtr_cfg_stamp.get_value(
                'Last_sync/watch_value')

    def verify_config_stamp_file(self, file_path):
        if not cm.file_exists(file_path):
            # if file is not present, create it
            f = open(file_path, "w+")
            f.close

    def validate_config_file(self):
        # TODO: add some rules to validate the current monitoring config file
        return True

    def start_monitor(self):

        if self.mtr_source_path:
            next_sync_datetime = None  # default value
            # check if delay between monitoring events was fulfilled
            if self.mtr_sync_date and str(self.mtr_frequency).isnumeric():
                try:
                    next_sync_datetime = datetime.strptime(self.mtr_sync_date, gc.STAMP_DATETIME_FORMAT) + \
                                         timedelta(seconds=self.mtr_frequency)
                except Exception as ex:
                    # report unexpected error to log file
                    _str = 'Unexpected Error "{}" occurred during calculating next sync datetime. ' \
                           'Saved sync date: "{}", sync frequency: "{}"' \
                        .format(ex, self.mtr_sync_date, self.mtr_frequency)
                    self.status.append(_str)
                    _str = _str + '\n{} '.format(traceback.format_exc())
                    self.log.error(_str)
                    self.error.add_error(_str)

            if not next_sync_datetime or next_sync_datetime < datetime.now():
                self.log.info(
                    'Monitoring delay of "{}" seconds has expired since the last syncronization event on {}. '
                    'Proceeding to monitor "{}" file.'.format(
                        self.mtr_frequency if self.mtr_frequency else 'N/A',
                        self.mtr_sync_date if self.mtr_sync_date else 'N/A',
                        self.mtr_source))
                custom_action = self.action_copy  # set default value
                if self.mtr_action == 'copy':
                    custom_action = self.action_copy
                watcher = Watcher(
                    self.mtr_source_path, custom_action, self,
                    self.mtr_watch_value)  # self.mtr_item, self.mtr_type)
                watcher.watch()  # start the watch going

                # update stats in the config file
                datetime_stamp = time.strftime(gc.STAMP_DATETIME_FORMAT,
                                               time.localtime())
                self.mtr_cfg_stamp.set_value(datetime_stamp,
                                             'Last_sync/date_time')
                self.log.info(
                    'Datetime information for monitored file was recorded: Last_sync/date_time: {}'
                    .format(datetime_stamp))

            else:
                _str = 'Monitoring delay of "{}" seconds has not expired since the last syncronization event on {}. '\
                        .format(self.mtr_frequency if self.mtr_frequency else 'N/A',
                                self.mtr_sync_date if self.mtr_sync_date else 'N/A')
                self.log.info(_str)
                self.status.append(_str)
        else:
            _str = 'Source file "{}" was not found in the source directory "{}". '\
                .format(self.mtr_source_file, self.mtr_source_dir)
            self.log.warning(_str)
            self.status.append(_str)

    def action_copy(self, file_time_stamp):
        self.log.info('Start copying "{}" to "{}"'.format(
            self.mtr_source, self.mtr_destin))
        self.new_file_time_stamp = file_time_stamp
        try:
            shutil.copy(self.mtr_source_path, self.mtr_destin)
            _str = 'Copying of "{}" to "{}" completed successfuly.'.format(
                self.mtr_source_path, self.mtr_destin)
            self.log.info(_str)

            self.action_completed = True
            self.status.append(_str)

            # update stats in the config file
            self.mtr_cfg_stamp.set_value(file_time_stamp,
                                         'Last_sync/watch_value')
            self.log.info(
                'Stamp information for just copied file was recorded: '
                'Last_sync/watch_value: {}'.format(file_time_stamp))

        except Exception as ex:
            # report unexpected error to log file
            _str = 'Unexpected Error "{}" occurred during copying file "{}" to "{}"\n{} ' \
                .format(ex, self.mtr_source, self.mtr_destin, traceback.format_exc())
            self.log.error(_str)
            self.error.add_error(_str)
            self.status.append(_str)
def process_download_inquiries():

    # load main config file and get required values
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1

    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both configs were loaded, update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())
    # print ('m_cfg = {}'.format(m_cfg.cfg))
    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new inquiry files will be posted
    inquiries_loc = m_cfg.get_value('Location/inquiries')

    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing inquiry files will be stored
    # (one file per inquiry)
    gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path')
    # path to the folder where all processed (and renamed) inquiries will be stored
    gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value(
        'Location/inquiries_processed_relative_path')
    # get config setting for the processed_add_datestamp and save it to global const module
    processed_add_datestamp = m_cfg.get_value(
        'Location/processed_add_datestamp')
    if processed_add_datestamp:
        gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp
    # path to the folder where created submission packages will be located. One package sub_folder per inquiry.
    gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests')
    # path to dir with dynamically created inquiry files for disqualified aliquots
    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified_path')

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME

    # this variable define if Data Downloader app will be executed at the end of processing inquiries
    run_data_download = m_cfg.get_value('Execute/run_data_downloader')
    # path to the Data Downloader tool
    gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path')

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    # email_attchms = []

    inquiries_path = Path(inquiries_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']

    mlog.info(
        'Start processing download inquiries in "{}"'.format(inquiries_path))

    try:

        (root, source_inq_dirs, _) = next(walk(inquiries_path))

        inq_proc_cnt = 0
        errors_present = 'OK'

        for inq_dir in source_inq_dirs:
            source_inquiry_path = Path(root) / inq_dir
            mlog.info(
                'Selected for processing inquiry source: "{}", full path: {}'.
                format(inq_dir, source_inquiry_path))

            (_, _, inq_files) = next(walk(source_inquiry_path))

            # filter only excel files for processing as inquiries
            inquiries = [
                fl for fl in inq_files if fl.endswith(('xlsx', 'xls'))
            ]
            # filter out temp files (starting with '~$') created when an excel file is open
            inquiries = [fl for fl in inquiries if not fl.startswith('~$')]

            mlog.info('Inquiry files presented (count = {}): "{}"'.format(
                len(inquiries), inquiries))

            for inq_file in inquiries:
                inq_path = Path(source_inquiry_path) / inq_file

                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(inq_path))
                    mlog.info('The following Inquiry file was selected: "{}".'.
                              format(inq_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    inq_obj = Inquiry(inq_path, m_cfg)

                    if inq_obj and inq_obj.loaded:
                        # proceed processing inquiry
                        mlog.info('Inquiry file was successfully loaded.')
                        mlog.info(
                            'Starting processing Download Inquiry file: "{}".'.
                            format(inq_path))

                        inq_obj.process_inquiry()

                        mlog.info(
                            'Processing of Download Inquiry was finished for {}'
                            .format(inq_path))

                    inq_proc_cnt += 1

                    # identify if any errors were identified and set status variable accordingly
                    if not inq_obj.error.exist():
                        if not inq_obj.disqualified_items:
                            # no disqualified sub-aliquots present
                            fl_status = 'OK'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            # errors_present = 'OK'  # this variable is set to OK by default, no update needed
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK_with_Disqualifications'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \
                            .format(fl_status, inq_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                    else:
                        mlog.warning(_str)

                    processed_dir = inq_obj.processed_folder  # 'Processed'
                    # combine the name of the processed file
                    inq_processed_name = fl_status + '_' + str(
                        inq_file).replace(' ', '_').replace('__', '_')
                    if gc.PROCESSED_ADD_DATESTAMP:
                        inq_processed_name = ts + '_' + inq_processed_name
                    # move processed files to Processed folder
                    fl_processed_name = cm.move_file_to_processed(
                        inq_path, inq_processed_name, processed_dir,
                        inq_obj.logger, inq_obj.error)
                    if fl_processed_name:
                        mlog.info(
                            'Processed file "{}" was moved(renamed) to: "{}"'.
                            format(inq_path,
                                   processed_dir / fl_processed_name))
                    else:
                        errors_present = errors_present + '|MoveProcessedError'
                        mlog.warning(
                            'Moving the processed file "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                inq_path, inq_obj.log_handler.baseFilename))

                    # preps for email notification
                    # create a dictionary to feed into template for preparing an email body
                    template_feeder = {
                        'file_num':
                        inq_proc_cnt,
                        'file_path':
                        str(inq_path),
                        'file_path_new':
                        (str(processed_dir / fl_processed_name)
                         if processed_dir and fl_processed_name else None),
                        'inq_obj_errors_cnt':
                        inq_obj.error.count,
                        'log_file_path':
                        inq_obj.log_handler.baseFilename,
                        'dld_request_file_path':
                        str(inq_obj.download_request_path),
                        'inq_sources':
                        inq_obj.inq_sources,
                        'inq_match_aliquots':
                        inq_obj.inq_match_arr,
                        'inq_disqul_aliquots':
                        inq_obj.disqualified_items,
                        'inq_disqul_reprocess_path':
                        str(inq_obj.disqualified_inquiry_path)
                    }
                    email_body_part = cm.populate_email_template(
                        'processed_inquiry.html', template_feeder)
                    email_msgs.append(email_body_part)

                    # deactivate the current Inquiry logger
                    deactivate_logger_common(inq_obj.logger,
                                             inq_obj.log_handler)
                    inq_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, inq_path, traceback.format_exc()))
                    raise

        mlog.info('Number of successfully processed Inquiries = {}'.format(
            inq_proc_cnt))

        # start Data Download request if proper config setting was provided
        dd_status = {'status': '', 'message': ''}
        if run_data_download:
            # start process
            mlog.info(
                'Starting asynchronously Data Downloader app: "{}".'.format(
                    gc.DATA_DOWNLOADER_PATH))
            try:
                dd_process = cm.start_external_process_async(
                    gc.DATA_DOWNLOADER_PATH)
                # check if it is running
                dd_status = cm.check_external_process(dd_process)
                mlog.info(
                    'Status of running Data Downloader app: "{}".'.format(
                        dd_status))
            except Exception as ex:
                # report unexpected error during starting Data Downloader
                _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \
                    .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc())
                mlog.critical(_str)
                dd_status = {'status': 'Error', 'message': _str}

        mlog.info('Preparing to send notificatoin email.')

        email_to = m_cfg.get_value('Email/send_to_emails')
        email_subject = 'processing of download inquiry. '

        if inq_proc_cnt > 0:  # inquiries and len(inquiries) > 0:
            # collect final details and send email about this study results

            err_present = errors_present.split(
                '|'
            )  # get all statuses into an array; 1st element is the main status
            if err_present:
                # set email subject based on the main status err_present[0]
                if err_present[0] == 'OK':
                    email_subject = 'SUCCESSFUL ' + email_subject
                elif err_present[0] == 'DISQUALIFY':
                    email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
                else:
                    email_subject = 'ERROR(s) present during ' + email_subject
            if len(err_present) > 1:
                if err_present[1] == 'MoveProcessedError':
                    email_subject = email_subject + ' Error moving inquiry to processed.'

            if dd_status and 'status' in dd_status.keys(
            ) and dd_status['status'].lower() == 'error':
                email_subject = email_subject + ' Errors starting Data Downloader.'

            # create a dictionary to feed into template for preparing an email body
            template_feeder = {
                'inq_cnt': inq_proc_cnt,
                'run_data_download': run_data_download,
                'downloader_path': gc.DATA_DOWNLOADER_PATH,
                'downloader_start_status': dd_status['status'].lower(),
                'processed_details': '<br/>'.join(email_msgs)
            }
            email_body = cm.populate_email_template('processed_inquiries.html',
                                                    template_feeder)

            # remove return characters from the body of the email, to keep just clean html code
            email_body = email_body.replace("\r", "")
            email_body = email_body.replace("\n", "")

            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            mlog.info(
                'Sending a status email with subject "{}" to "{}".'.format(
                    email_subject, email_to))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=email_to,
                        subject=email_subject,
                        message=email_body
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            mlog.info(
                'End of processing of download inquiries in "{}".'.format(
                    inquiries_path))

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()
示例#13
0
    def init_specific_settings(self):
        self.source_name_generic = self.inq_obj.get_inquiry_value_by_field_name(
            'source_id', self.inq_line, False)

        # get configuration of all source location types of the given datasource
        source_locations = self.conf_process_entity.get_value('sources')
        self.source_locations = source_locations

        # get (if present) configuration values to adjust the source path with the current user mountpoint specics
        path_to_replace = self.conf_process_entity.get_value(
            'Location/path_to_replace')
        path_local_mountpoint = self.conf_process_entity.get_value(
            'Location/path_local_mountpoint')
        map_file_path = self.conf_process_entity.get_value(
            'Location/map_file_path')

        # default search_by parameters from source config file
        search_by_default = self.conf_process_entity.get_value(
            'search_method_default/search_by')
        search_deep_level_defalult = self.conf_process_entity.get_value(
            'search_method_default/search_deep_level_max')
        exclude_dirs_defalult = self.conf_process_entity.get_value(
            'search_method_default/exclude_folders')
        ext_match_defalult = self.conf_process_entity.get_value(
            'search_method_default/file_ext')
        aliquot_match_default = self.conf_process_entity.get_value(
            'search_method_default/aliquot_match')
        soft_comparisons_default = self.conf_process_entity.get_value(
            'soft_comparision')
        map_file_default = self.conf_process_entity.get_value(
            'search_method_default/map_file')

        # get source main value from the inquiry file for the current row
        source_main = self.inq_obj.get_inquiry_value_by_field_name(
            'source_main', self.inq_line, False)
        if len(source_main.strip()) == 0:
            # if source_main was no provided, set flag allow_nomatch_per_sourcetype = True
            self.allow_nomatch_per_sourcetype = True

        ds_count = 0
        for loc_item in source_locations:
            ds_count += 1
            current_source_type_id = loc_item[
                'source_id'] if 'source_id' in loc_item else ''
            source_subfolder = loc_item['source_subfolder'] \
                if 'source_subfolder' in loc_item and loc_item['source_subfolder'] else ''

            self.logger.info(
                'Start processing data source #{}, source_id: "{}"'.format(
                    ds_count, current_source_type_id))

            current_source_id_path = \
                self.inq_obj.get_inquiry_value_by_field_name('source_' + current_source_type_id, self.inq_line, False)\
                    .strip()

            # if a special path for the current source id was not provided in the inquiry row, use the default one
            if len(current_source_id_path) == 0:
                current_source_id_path = source_main + '/' + source_subfolder

            # check if partial path replacement in the current_source_id_path is required for the current datasource
            if path_to_replace and path_local_mountpoint:
                current_source_id_path = str(
                    Path(
                        current_source_id_path.replace(path_to_replace,
                                                       path_local_mountpoint)))

            # add each source type id to a source_types dictionary to hold all source types required for this datasource
            # save path associated with the current source type here as well
            self.source_types[current_source_type_id] = {
                'source_path': current_source_id_path,
                'items_count': 0
            }

            # check if a current source has specific search_by parameters, otherwise use default ones
            src_sm = loc_item[
                'search_method'] if 'search_method' in loc_item.keys(
                ) else None
            search_by = src_sm['search_by'] \
                if src_sm and 'search_by' in src_sm.keys() else search_by_default
            search_deep_level = src_sm['search_deep_level_max'] \
                if src_sm and 'search_deep_level_max' in src_sm.keys() else search_deep_level_defalult
            exclude_dirs = src_sm['exclude_folders'] \
                if src_sm and 'exclude_folders' in src_sm.keys() else exclude_dirs_defalult
            ext_match = src_sm['file_ext'] \
                if src_sm and 'file_ext' in src_sm.keys() else ext_match_defalult
            soft_comparisons = src_sm['soft_comparision'] \
                if src_sm and 'soft_comparision' in src_sm.keys() else soft_comparisons_default
            aliquot_match = src_sm['aliquot_match'] \
                if src_sm and 'aliquot_match' in src_sm.keys() else aliquot_match_default
            map_file = src_sm['map_file'] \
                if src_sm and 'map_file' in src_sm.keys() else map_file_default

            # if the file with the custom soft-comparison rules was supplied as the command line argument use it
            # and overwrite the current value of the soft_comparison variable
            if gc.CONFIG_CUSTOM_SOFT_MATCH:
                custom_soft_match_cfg = ConfigData(gc.CONFIG_CUSTOM_SOFT_MATCH)
                soft_comparisons = custom_soft_match_cfg.get_value(
                    "soft_comparision")

            # update map_file's "file_path" variable with the value of "map_file_path" from local config
            if map_file:
                if 'file_path' in map_file:
                    if len(map_file['file_path'].strip()
                           ) == 0 and map_file_path and len(
                               map_file_path.strip()) > 0:
                        map_file['file_path'] = map_file_path.strip()

            error_on_disqualification = loc_item['report_error_on_disqualification'] \
                if 'report_error_on_disqualification' in loc_item.keys() else False
            web_location = loc_item[
                'web_location'] if 'web_location' in loc_item.keys() else None
            xpath = loc_item['xpath'] if 'xpath' in loc_item.keys(
            ) else '/'  # default option - start with root element

            # make sure that web urls ends with "/", if not add the charcter
            if web_location and current_source_id_path[-1:] != '/':
                current_source_id_path += '/'

            # set default value for target_subfolder
            target_subfolder = ''
            # if target_subfolder value is provided in config, get it from there
            if 'target_subfolder' in loc_item.keys():
                target_subfolder = loc_item['target_subfolder'] if loc_item[
                    'target_subfolder'] else ''

            self.logger.info(
                'Current data source config details: '
                'source_type_id: "{}",'
                'web_location: "{}", '
                'search_by: "{}", '
                'search_deep_level_max: "{}", '
                'exclude_folders: "{}", '
                'file_ext: "{}", '
                'soft_comparison (loose comparison): "{}", '
                'aliquot_match: "{}", '
                'target_subfolder: "{}"'
                'xpath: "{}"'
                ''.format(current_source_type_id,
                          (web_location if web_location else False), search_by,
                          (search_deep_level if search_deep_level else
                           0 if web_location else 'No limit'), exclude_dirs,
                          (ext_match if ext_match else ''),
                          (soft_comparisons if soft_comparisons else ''),
                          aliquot_match, target_subfolder, xpath))
            self.logger.info(
                'Current data source path: {}'.format(current_source_id_path))

            # start processing current source
            items = []
            disqualify = None
            if search_by == 'folder_name':
                if not web_location:
                    items, disqualify = self.get_data_by_folder_name(
                        current_source_id_path, search_deep_level,
                        exclude_dirs)
                else:
                    items, disqualify = self.get_web_data(
                        current_source_id_path, xpath, exclude_dirs)
            elif search_by == 'file_name':
                if not web_location:
                    items, disqualify = self.get_data_by_file_name(
                        current_source_id_path, search_deep_level,
                        exclude_dirs, ext_match)
                else:
                    items, disqualify = self.get_web_data(
                        current_source_id_path, xpath, exclude_dirs, ext_match)
            elif search_by == 'map_file':
                if not web_location:
                    items, disqualify = self.get_data_by_map_file(
                        current_source_id_path, map_file)
                else:
                    _str = 'Web locations are not currently set to work with the "map_file" ' \
                           'search_by configuration parameter'
                    self.logger.warning(_str)
                    disqualify = (loc_item['path'], _str)
            else:
                _str = 'Unexpected "search_by" configuration parameter "{}" was provided.'.format(
                    search_by)
                _str2 = 'Skipping processing of the current source "{}"'.format(
                    current_source_id_path)
                self.logger.warning('{} {}'.format(_str, _str2))
                disqualify = (loc_item['path'], _str)

            if disqualify:
                # if disqualification was reported for current source location, disqualify it and skip to next location
                self.disqualify_source(current_source_type_id, disqualify[1],
                                       error_on_disqualification)
                continue

            if items and len(items) > 0:
                for item in items:

                    # this variable will be filled with a value if realpath file name does not match the name
                    # of the link provided as the file to be retrieved
                    target_copied_item_name = ''

                    if not web_location:
                        # identify real path of the item for cases when symlinks are part of the path
                        # this should be done for all not web locations

                        # get values to be searched in the item's path to adopt the provided path to the local mount point
                        path_to_replace = self.conf_process_entity.get_value(
                            'Location/path_to_replace')
                        path_local_mountpoint = self.conf_process_entity.get_value(
                            'Location/path_local_mountpoint')

                        real_path = os.path.realpath(
                            item['path'])  # real path of the current item

                        if path_to_replace and path_local_mountpoint and path_to_replace != path_local_mountpoint:
                            # check if real path needs to be adopted to use the local mount point
                            while path_to_replace in real_path:
                                # loop until the final path does not have the string requiring the mountpoint adoption
                                real_path = real_path.replace(
                                    path_to_replace, path_local_mountpoint)
                                real_path = os.path.realpath(real_path)

                        if real_path != item['path']:
                            # if realpath file/dir name not equal file/dir of the given location, save given name
                            # into a separate variable target_copied_item_name
                            target_copied_item_name = os.path.basename(
                                item['path'])

                    # identify the obj_type of the current item
                    if web_location:
                        # if the datasource is the web location
                        if search_by == 'folder_name':
                            obj_type = 'dir'
                        elif search_by == 'file_name':
                            obj_type = 'file'
                        else:
                            obj_type = 'UNKNOWN'
                    else:
                        # if the datasource is a network location
                        if os.path.isfile(real_path):
                            obj_type = 'file'
                        elif os.path.isdir(real_path):
                            obj_type = 'dir'
                        else:
                            obj_type = 'UNKNOWN'

                    item_details = {  # 'aliquot_search': item['aliquot_search'],
                        # 'path': item['path'] if not web_location else current_source_id_path + item['path'],
                        'path':
                        real_path if not web_location else
                        current_source_id_path + item['path'],
                        # 'name': os.path.basename(item['path']) if not web_location else item['path'],
                        'name':
                        os.path.basename(item['aliquot_search'])
                        if not web_location else item['aliquot_search'],
                        'target_copied_item_name':
                        target_copied_item_name,
                        'source_type_id':
                        current_source_type_id,
                        'target_subfolder':
                        target_subfolder,
                        'soft_comparisions':
                        soft_comparisons,
                        'aliquot_match':
                        aliquot_match,
                        'search_by':
                        search_by,
                        'obj_type':
                        obj_type,
                        'web_location':
                        web_location
                    }
                    self.source_content_arr.append(item_details)
            else:
                self.logger.warning(
                    'No available files/folders were found in the current source. '
                    'Configuration settings of the source might need to be reviewed.'
                )

            self.logger.info(
                'Processing data source #{} was completed. '
                'Total number of files/folder available in the source = {}.'.
                format(ds_count,
                       len(items) if items else 0))
示例#14
0
class ApiProcess():
    def __init__(self, api_cfg_file, log_obj):
        self.loaded = False
        # set logger object
        self.logger = log_obj
        self.dataset = None

        # set error object
        self.error = ApiError(self)

        self.logger.info(
            'Start processing API call for the following conig file: {}'.
            format(api_cfg_file))

        # load config file for the current api process
        cfg_file_path = gc.CONFIGS_DIR + api_cfg_file
        self.api_cfg = ConfigData(cfg_file_path)

        if not self.api_cfg.loaded:
            _str = 'Cannot load the config file: "{}"'.format(cfg_file_path)
            self.logger.error(_str)
            self.error.add_error(_str)
            return

        # get values from the config file
        self.api_name = self.api_cfg.get_value('API/name')
        self.api_url = self.api_cfg.get_value('API/url')
        self.post_fields = self.api_cfg.get_value('API/post_fields')

        # verify if "eval" is present in any of the post fields and perform the evaluation, if needed
        if self.post_fields:
            for pf in self.post_fields:
                self.post_fields[pf] = cm.eval_cfg_value(
                    self.post_fields[pf], self.logger, self.error)

        # if no errors were generated during init, set loaded = True
        if not self.error.errors_exist():
            self.loaded = True

    def process_api_call(self):
        # perform the actual API call, collect output in api_output and status into errors_reported (T/F variable)
        api_output, errors_reported = cm.perform_api_call(
            self.api_url, self.post_fields, self.logger, self.error)

        # check if errors were reported
        if errors_reported:
            # stop processing of API is error is reported
            self.logger.warning(
                'Aborting processing the current API call, since errors were reported (see earlier entries)'
            )
            return
        #validate the returned ds
        if api_output and len(api_output.strip()) != 0:
            # proceed with processing an API ds
            self.dataset = ApiDataset(api_output, self.api_cfg, self.logger,
                                      self.error, self.api_name)
            if self.dataset.loaded:
                self.dataset.submit_rows_to_db()

            else:
                self.logger.warning(
                    'Application failed to process the API response. See previous log entries '
                    'for more details. Aborting processing the current API call.'
                )
                return
        else:
            # stop processing API if returned ds is empty
            self.logger.warning(
                'API call returned an empty ds, aborting processing the current API call'
            )
            return

        pass