示例#1
0
class Request(File):
    def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        if main_cfg:
            self.conf_main = main_cfg
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # if cfg_path=='':
        #     self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # else:
        #     self.conf_main = ConfigData(cfg_path)

        self.error = RequestError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Submission request file {}'.format(filepath))

        # self.file_dict = OrderedDict()
        # self.rows = OrderedDict()

        self.columnlist = []
        self.samples = []
        self.sub_aliquots = []
        self.disqualified_sub_aliquots = {}
        self.aliquots_to_subaliquots_map = {
        }  # holds the map of aliquots to sub-aliquots for interpreting DB responses
        self.disqualified_request_path = ''  # will store path to a request file with disqualified sub-aliquots
        self.project = ''
        self.bulk_location = ''
        self.assay = ''
        self.center = ''
        self.center_id = None
        self.center_code = None
        self.experiment_id = ''
        self.data_source_names = ''
        self.data_source_objects = {
        }  # dictionary to store all collected data sources for the request

        self.aliquots = None
        self.qualified_aliquots = None
        self.raw_data = None
        self.assay_data = None
        self.attachments = None
        self.submission_forms = None
        self.submission_package = None
        self.data_source_names = None
        # will hold value corresponding to the type of data source being used (attachments are not ignored)
        # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used
        self.data_source_forms_assignment = None

        # self.sheet_name = ''
        self.sheet_name = sheet_name.strip()
        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_assay = None

        self.get_file_content()

    def get_file_content(self):
        if not self.columnlist:
            if cm.file_exists(self.filepath):
                self.logger.debug('Loading file content of "{}"'.format(
                    self.filepath))

                with xlrd.open_workbook(self.filepath) as wb:
                    if not self.sheet_name or len(self.sheet_name) == 0:
                        # by default retrieve the first sheet in the excel file
                        sheet = wb.sheet_by_index(0)
                    else:
                        # if sheet name was provided
                        sheets = wb.sheet_names()  # get list of all sheets
                        if self.sheet_name in sheets:
                            # if given sheet name in the list of available sheets, load the sheet
                            sheet = wb.sheet_by_name(self.sheet_name)
                        else:
                            # report an error if given sheet name not in the list of available sheets
                            _str = (
                                'Given worksheet name "{}" was not found in the file "{}". '
                                'Verify that the worksheet name exists in the file.'
                            ).format(self.sheet_name, self.filepath)
                            self.error.add_error(_str)
                            self.logger.error(_str)

                            self.lineList = None
                            self.loaded = False
                            return self.lineList

                sheet.cell_value(0, 0)

                lines = [
                ]  # will hold content of the request file as an array of arrays (rows)
                for i in range(sheet.ncols):
                    column = []
                    for j in range(sheet.nrows):
                        if i == 0:
                            lines.append(
                                []
                            )  # adds an array for each new row in the request file

                        # print(sheet.cell_value(i, j))
                        cell = sheet.cell(j, i)
                        cell_value = cell.value
                        # take care of number and dates received from Excel and converted to float by default
                        if cell.ctype == 2 and int(cell_value) == cell_value:
                            # the key is integer
                            cell_value = str(int(cell_value))
                        elif cell.ctype == 2:
                            # the key is float
                            cell_value = str(cell_value)
                        # convert date back to human readable date format
                        # print ('cell_value = {}'.format(cell_value))
                        if cell.ctype == 3:
                            cell_value_date = xlrd.xldate_as_datetime(
                                cell_value, wb.datemode)
                            cell_value = cell_value_date.strftime(
                                "%Y-%m-%directory")
                        column.append(
                            cell_value
                        )  # adds value to the current column array
                        lines[j].append(
                            '"' + str(cell_value) + '"'
                        )  # adds value in "csv" format for a current row

                    # self.columnlist.append(','.join(column))
                    self.columnlist.append(
                        column)  # adds a column to a list of columns

                # populate lineList property
                self.lineList = []
                for ln in lines:
                    self.lineList.append(','.join(ln))

                wb.unload_sheet(sheet.name)

                # load passed request parameters (by columns)
                self.get_request_parameters()

                # validate provided information
                self.logger.info(
                    'Validating provided request parameters. project: "{}", bulk location: "{}", '
                    'assay: "{}", db_center_code_or_id: "{}",'
                    'Sub-Aliquots: "{}"'.format(self.project,
                                                self.bulk_location, self.assay,
                                                self.center,
                                                self.sub_aliquots))
                self.validate_request_params()

                if self.error.exist():
                    # report that errors exist
                    self.loaded = False
                    # print(self.error.count)
                    # print(self.error.get_errors_to_str())
                    _str = 'Errors ({}) were identified during validating of the request. \nError(s): {}'.format(
                        self.error.count, self.error.get_errors_to_str())
                else:
                    self.loaded = True
                    _str = 'Request parameters were successfully validated - no errors found.'
                self.logger.info(_str)

                # combine Experiment_id out of request parameters
                if self.center_code and len(self.center_code.strip()) > 0:
                    # use center code if available
                    self.experiment_id = "_".join(
                        [self.project, self.center_code, self.assay])
                else:
                    # use provided value for the center column from request, if center_code is not available
                    self.experiment_id = "_".join(
                        [self.project, self.center, self.assay])

            else:
                _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format(
                    self.filepath)
                self.error.add_error(_str)
                self.logger.error(_str)

                self.columnlist = None
                self.lineList = None
                self.loaded = False
        return self.lineList

    # get all values provided in the request file
    def get_request_parameters(self):
        self.project = self.columnlist[0][1]
        self.bulk_location = self.columnlist[1][1]
        self.assay = self.columnlist[2][1].lower()
        self.center = self.columnlist[3][
            1]  # center code (if alpha numeric) or center id (if numeric)
        self.sub_aliquots = self.columnlist[4]
        if self.sub_aliquots and len(self.sub_aliquots) > 0:
            self.sub_aliquots.pop(0)  # get rid of the column header
        # self.samples = self.columnlist[5]
        # if self.samples and len(self.samples) > 0:
        #     self.samples.pop(0) # get rid of the column header

    # validates provided parameters (loaded from the submission request file)
    def validate_request_params(self):
        _str_err = ''
        _str_warn = ''
        if len(self.sub_aliquots) == 0:
            _str_err = '\n'.join([
                _str_err, 'List of provided sub-samples is empty. '
                'Aborting processing of the submission request.'
            ])
        # Check if empty sub-samples were provided
        if '' in self.sub_aliquots:
            i = 0
            cleaned_cnt = 0
            for s in self.sub_aliquots:
                # check for any empty sub-aliquot values and remove them
                if len(s.strip()) == 0:
                    self.sub_aliquots.pop(i)
                    cleaned_cnt += 1
                else:
                    i += 1
            if cleaned_cnt > 0:
                _str_warn = '\n'.join([
                    _str_warn,
                    'Empty sub-aliqouts (count = {}) were removed from the list. '
                    'Here is the list of sub-aliqouts after cleaning (count = {}): "{}" '
                    .format(cleaned_cnt, len(self.sub_aliquots),
                            self.sub_aliquots)
                ])
        # check for empty values
        if len(self.project) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Program name was provided. Aborting processing of the submission request.'
            ])
        if len(self.bulk_location) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Bulk Location was provided. Aborting processing of the submission request.'
            ])
        if len(self.assay) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Assay was provided. Aborting processing of the submission request.'
            ])
        if len(self.center) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No DB Center information was provided. Aborting processing of the submission request.'
            ])

        # check for values that should match some predefined values from a dictionary
        # check assay value
        if not cm2.key_exists_in_dict(self.assay, 'assay'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Assay name "{}" is not matching a list of expected assay names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.assay, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.assay = cm2.get_dict_value(self.assay, 'assay')

        # check project value
        if not cm2.key_exists_in_dict(self.project.lower(), 'project'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Program name "{}" is not matching a list of expected names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.project, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.project = cm2.get_dict_value(self.project.lower(), 'project')

        # validate center_code or center_id value
        self.logger.info(
            'Start validation of center value "{}" provided in the request'.
            format(self.center))
        db = DBAccess(self.logger, self.error,
                      self.conf_main)  # create DBAccess object
        db.open_connection()
        # test center value assuming center code was provided
        dataset = db.validate_center_code(self.center, self.project, 'code',
                                          'code')
        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
            dataset, 'center_id', 'center_code')
        if center_id_out1:
            # center id was returned, meaning center was validated fine
            self.center_id = center_id_out1
            # get center code value from the current DB dataset
            _str_err_out3, center_code = self.get_field_value_from_dataset(
                dataset, 'center_code')
            if center_code:
                # center code retrieved OK
                self.center_code = center_code
            else:
                # report an error during retrieving center_code
                _str_err = '\n'.join([_str_err, _str_err_out3])
        else:
            # if center code was not validated at first attempt, validate it assuming the center id was given
            dataset = db.validate_center_code(self.center, self.project, 'id',
                                              'code')
            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                dataset, 'center_id', 'center_id')
            if center_id_out2:
                # center id was validated at the 2nd attempt, ignore the 1st failed center code validation
                self.center_id = center_id_out2
                # get center code value from the current DB dataset
                _str_err_out3, center_code = self.get_field_value_from_dataset(
                    dataset, 'center_code')
                if center_code:
                    # center code retrieved OK
                    self.center_code = center_code
                else:
                    # report an error during retrieving center_code
                    _str_err = '\n'.join([_str_err, _str_err_out3])
            else:
                # center validation attempts failed, report both failures
                _str_err = '\n'.join([_str_err, _str_err_out1, _str_err_out2])

        # get list of aliquots from list of sub-aliquots
        self.aliquots = [
            cm2.convert_sub_aliq_to_aliquot(al, self.assay)
            for al in self.sub_aliquots
        ]

        # create a map to convert aliquot value to sub_aliquot value (for processing DB responses given for aliquots)
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            self.aliquots_to_subaliquots_map[a] = sa

        if self.center_id:
            self.logger.info('Start validation of aliquot ids vs DB')
            # if center id was validated in the above code, validate received aliquots vs manifest dataset in DB
            dataset = db.validate_aliquot_ids(self.center_id, self.aliquots)
            if dataset:
                # create dictionary of received aliquots/sample ids
                aliquots_to_samples_map = {}
                for row in dataset:
                    if '_aliquot_id' in row and '_sample_id' in row:
                        aliquots_to_samples_map[
                            row['_aliquot_id']] = row['_sample_id']
                # check if each aliquot id was returned from a database and get the sample id from the dataset
                for sa, a in zip(self.sub_aliquots, self.aliquots):
                    if a in aliquots_to_samples_map:
                        if len(str(aliquots_to_samples_map[a]).strip()) > 0:
                            self.samples.append(aliquots_to_samples_map[a])
                        else:
                            _str = 'Blank Sample Id value was returned from DB for the sub-aliquot id "{}". ' \
                                   'The sub-aliquot was disqualified'.format(sa)
                            self.disqualify_sub_aliquot(sa, _str)
                            _str_warn = '\n'.join([_str_warn, _str])
                    else:
                        _str = 'Sub-aliquot id "{}" was not found in the database and was disqualified'.format(
                            sa)
                        self.disqualify_sub_aliquot(sa, _str)
                        _str_warn = '\n'.join([_str_warn, _str])
            else:
                _str_err = '\n'.join([
                    _str_err,
                    'Aliquot ids cannot be validated since no data was returned from DB for '
                    'center_id = "{}" and aliquot ids as following: {} '.
                    format(self.center_id, self.aliquots)
                ])
        db = None

        # report any collected errors
        if len(_str_err) > 0:
            _str_err = 'Validation of request parameters:' + _str_err
            self.error.add_error(_str_err)
            self.logger.error(_str_err)
        # report any collected warnings
        if len(_str_warn) > 0:
            _str_warn = 'Validation of request parameters:' + _str_warn
            self.logger.warning(_str_warn)

    def check_validation_dataset_outcome(self, dataset, validation_id_column,
                                         validation_id_name):
        _str_err = ''
        row_num = 1
        validation_id_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]  # get the first row of the dataset
                if 'status' in row:
                    status = row['status']
                if 'description' in row:
                    description = row['description']
                if validation_id_column in row:  # center_id
                    validation_id = row[validation_id_column]
            if status == 'OK':  # validation was successful
                validation_id_out = validation_id
            elif status == 'Failed':  # validation has failed
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB has Failed, description: {}'
                    .format(validation_id_name, description)
                ])
            else:  # unexpected status value was returned
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB returned unexpected status {}'
                    .format(validation_id_name, status)
                ])
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during validating {} in the DB. '
                'Check earlier entries in the log file.'.format(
                    validation_id_name)
            ])

        return _str_err, validation_id_out

    def get_field_value_from_dataset(self, dataset, field_name, row_num=None):
        # set default values
        if row_num is None:
            row_num = 1  # default row is #1

        _str_err = ''
        value_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]
                if field_name in row:
                    value_out = row[field_name]
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during retrieving value of "{}" (row #{})from the dataset. '
                .format(field_name, row_num)
            ])

        return _str_err, value_out

    def setup_logger(self, wrkdir, filename):

        # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)

        log_folder_name = gc.REQ_LOG_DIR  # gc.LOG_FOLDER_NAME

        # m_logger_name = gc.MAIN_LOG_NAME
        # m_logger = logging.getLogger(m_logger_name)

        logger_name = gc.REQUEST_LOG_NAME
        logging_level = self.conf_main.get_value('Logging/request_log_level')

        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(log_folder_name):
            log_folder_path = Path(wrkdir) / log_folder_name
        else:
            log_folder_path = Path(log_folder_name)

        lg = setup_logger_common(
            logger_name,
            logging_level,
            log_folder_path,  # Path(wrkdir) / log_folder_name,
            str(filename) + '_' +
            time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log')

        self.log_handler = lg['handler']
        return lg['logger']

    def load_request_configuration(self):
        # update main config file with the project/environmetn specific details from additional config files
        self.load_project_config_into_main(
            self.project
        )  # loads project specific config and merges it into main config
        # load project specific assay config file
        self.conf_assay = self.load_assay_conf(self.assay, self.project)
        if self.conf_assay:
            # update loaded assay config file with project/environment specific config assay_locatoin_config.yaml
            self.conf_assay = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_ASSAY_LOCATION, self.project, self.conf_assay)

    def process_request(self):
        self.data_source_names = cm.get_value_from_dictionary(
            'data_sources', self.conf_assay)  # self.conf_assay['data_sources']

        # path to the folder where created submission packages will be located.
        # since this location can be provided in the project config file, this assignment is happening
        # after loading the project config
        gc.OUTPUT_PACKAGES_DIR = self.conf_main.get_value(
            'Submission_location/output_packages')

        for data_source_name in self.data_source_names:
            # if isinstance(data_source_name, tuple)
            if isinstance(data_source_name, str):
                if data_source_name == 'attachment':
                    self.attachments = Attachment(self)
                elif data_source_name[-3:] == "_db":
                    self.data_source_objects[data_source_name] = DataSourceDB(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'db'
                else:
                    self.data_source_objects[data_source_name] = DataSource(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'file'
            elif isinstance(data_source_name, tuple):
                if data_source_name[0][-3:] == "_db":
                    self.data_source_objects[
                        data_source_name[0]] = DataSourceDB(
                            self, data_source_name[0], data_source_name[1])
                else:
                    self.data_source_objects[data_source_name[0]] = DataSource(
                        self, data_source_name[0], data_source_name[1])
            else:
                self.logger.error(
                    'Provided data source name ({}) is of unexpected format and cannot be processed.'
                    .format(data_source_name))

        # if data_source_forms_assignment was not assigned with any value in code before, assign a default to it
        # this a case when an assay submits only attachments and do not use any assay or QC data
        if not self.data_source_forms_assignment:
            self.data_source_forms_assignment = gc.DEFAULT_DATA_SOURCE_FORMS_ASSIGNMENT

        self.submission_package = SubmissionPackage(self)

        self.create_request_for_disqualified_sub_aliquots()

        self.create_trasfer_script_file()

        # check for errors and put final log entry for the request.
        if self.error.exist():
            _str = 'Processing of the current request was finished with the following errors: {}\n'.format(
                self.error.get_errors_to_str())
            self.logger.error(_str)
        else:
            _str = 'Processing of the current request was finished successfully.\n'
            self.logger.info(_str)

    def load_assay_conf(self, assay, project):
        assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project)
        cfg_assay = ConfigData(assay_cfg_path)
        assay_config = cfg_assay.get_value(assay.upper())
        if assay_config:
            self.logger.info(
                "Configuration for the {} assay was loaded from the assay config file: {}. "
                .format(assay.upper(), assay_cfg_path))
        else:
            _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \
                   "Aborting execution.".format(assay.upper(), assay_cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)

        return assay_config

    # def update_cfg_assay_with_location_details(self, project, cfg_assay):
    #     cfg_assay_location = ConfigData(gc.CONFIG_FILE_ASSAY_LOCATION.replace('{project}', project))
    #     if cfg_assay_location.loaded:
    #         self.logger.info('Local config file "{}" was loaded and being used.'.format(cfg_assay_location.cfg_path))
    #         cfg_assay = cm.update_dictionary_matching_keys(cfg_assay, cfg_assay_location.get_whole_dictionary())
    #     else:
    #         _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
    #             .format(cfg_assay_location.cfg_path)
    #         self.logger.error(_str)
    #         self.error.add_error(_str)
    #     return cfg_assay

    def update_cfg_dictionary_with_location_details(self, location_path,
                                                    project, cfg_to_update):
        cfg_location = ConfigData(location_path.replace('{project}', project))
        if cfg_location.loaded:
            self.logger.info(
                'Local config file "{}" was loaded and being used.'.format(
                    cfg_location.cfg_path))
            cfg_to_update = cm.update_dictionary_matching_keys(
                cfg_to_update, cfg_location.get_whole_dictionary())
        else:
            _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
                .format(cfg_location.cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)
        return cfg_to_update

    def load_project_config_into_main(self, project):
        # load project specific "project_config" config file
        cfg_project = ConfigData(
            gc.CONFIG_FILE_PROJECT.replace('{project}', project))
        if cfg_project.loaded:
            # if cfg_project was loaded, update it with the environment specific settings (from project_location config)
            cfg_project_updated = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_PROJECT_LOCATION, self.project,
                cfg_project.get_whole_dictionary())
            # update main config with the outcome of the previous updates
            self.conf_main.update(cfg_project_updated)

    def create_trasfer_script_file(self):
        self.logger.info("Start preparing transfer_script.sh file.")
        # path for the script file being created
        sf_path = Path(self.submission_package.submission_dir +
                       "/transfer_script.sh")

        # get script file template
        with open('scripts/' + self.project + '/transfer_script.sh',
                  'r') as ft:
            scr_tmpl = ft.read()

        # update placeholders in the script with the actual values
        smtp_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_env_name'))
        smtp_port = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_port_env_name'))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!smtp!}", smtp_server + ":" + str(smtp_port))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!to_email!}",
            ','.join(self.conf_main.get_value("Email/sent_to_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!from_email!}",
            self.conf_main.get_value("Email/default_from_email"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!send_email_flag!}",
            str(self.conf_main.get_value("Email/send_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!cmd!}",
            self.conf_main.get_value("DataTransfer/transfer_command"))

        # the following will be utilized if mount point is being used by the transfer script (i.e. for Peerless)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mp_cmd!}",
            self.conf_main.get_value("DataTransfer/mount_point_command"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_local_dir!}",
            self.conf_main.get_value("DataTransfer/mount_local_dir"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_remote_dir!}",
            self.conf_main.get_value("DataTransfer/mount_remote_dir"))

        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!source_dir!}", self.submission_package.submission_dir)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!target_dir!}",
            self.conf_main.get_value("DataTransfer/remote_target_dir"))

        ssh_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_server_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_server!}",
                                              str(ssh_server))
        # apply user name as the very last replacement statement, since it can be used as part of previous replacements
        ssh_user = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_user_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_user!}",
                                              str(ssh_user))

        set_permissions = False
        set_perm_value = self.conf_main.get_value("DataTransfer/exec_permis")
        if set_perm_value:
            try:
                exec_permission = eval(set_perm_value.strip())
                set_permissions = True
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during evaluating of "DataTransfer/exec_permis" value ' \
                       '"{}" retrieved from the main config file. Permission setup operation will be skipped. \n{} '\
                    .format(ex, set_perm_value, traceback.format_exc())
                self.logger.warning(_str)
                # self.error.add_error(_str)
                set_permissions = False

        with open(sf_path, "w") as sf:
            sf.write(scr_tmpl)

        if set_permissions:
            try:
                # if permissions to be set were retrieved from config file, set them here
                st = os.stat(sf_path)
                os.chmod(sf_path, st.st_mode | exec_permission)  #stat.S_IXUSR
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during setting up permissions "{}" for the script file ' \
                       '"{}". \n{} '\
                    .format(ex, set_perm_value, sf_path, traceback.format_exc())
                self.logger.warning(_str)
                self.error.add_error(_str)
        else:
            _str = 'Permission setup was skipped for the transfer script file. ' \
                   'Note: value of "DataTransfer/exec_permis" from main config was set to "{}".'\
                                    .format(set_perm_value)
            self.logger.warning(_str)

        self.logger.info("Finish preparing '{}' file.".format(sf_path))

    def disqualify_sub_aliquot(self, sa, details):
        # adds a sub aliquots to the disctionary of disqualified sub_aliquots
        # key = sub-aliquot, value = array of details for disqualification; 1 entry can have multiple detail reasons
        if sa in self.disqualified_sub_aliquots.keys():
            self.disqualified_sub_aliquots[sa].append(details)
        else:
            arr_details = [details]
            self.disqualified_sub_aliquots[sa] = arr_details
        self.logger.warning(
            'Sub-aliquot "{}" was disqualified with the following details: "{}"'
            .format(sa, details))

    def populate_qualified_aliquots(self):
        # reset self.qualified_aliquots array
        self.qualified_aliquots = []
        #select only aliquots that were not disqualified
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            if not sa in self.disqualified_sub_aliquots.keys():
                self.qualified_aliquots.append(a)

    def create_request_for_disqualified_sub_aliquots(self):

        # proceed only if some disqualified sub-aliquots are present
        if self.disqualified_sub_aliquots:

            self.logger.info(
                "Start preparing a request file for disqualified sub-aliquots '{}'."
                .format([val
                         for val in self.disqualified_sub_aliquots.keys()]))

            wb = xlwt.Workbook()  # create empty workbook object
            sh = wb.add_sheet(
                'Submission_Request'
            )  # sheet name can not be longer than 32 characters

            cur_row = 0  # first row for 0-based array
            cur_col = 0  # first col for 0-based array
            #write headers to the file
            headers = self.get_headers()
            for val in headers:
                sh.write(cur_row, cur_col, val)
                cur_col += 1

            cur_row += 1

            for sa in self.sub_aliquots:
                if sa in self.disqualified_sub_aliquots.keys():
                    sh.write(cur_row, 0, self.project)
                    sh.write(cur_row, 1, self.bulk_location)
                    sh.write(cur_row, 2, self.assay)
                    sh.write(cur_row, 3, self.center)
                    sh.write(cur_row, 4, sa)
                    cur_row += 1

            self.disqualified_request_path = Path(
                gc.DISQUALIFIED_REQUESTS + '/' +
                time.strftime("%Y%m%d_%H%M%S", time.localtime()) +
                '_reprocess_disqualified _' + Path(self.filename).stem +
                '.xls')

            # if DISQUALIFIED_REQUESTS folder does not exist, it will be created
            os.makedirs(gc.DISQUALIFIED_REQUESTS, exist_ok=True)

            wb.save(str(self.disqualified_request_path))

            self.logger.info(
                "Successfully prepared the request file for disqualified sub-aliquots and saved in '{}'."
                .format(str(self.disqualified_request_path)))
    def process_inquiry_sources(self):
        cur_row = 0
        for inq_line in self.lines_arr:
            if cur_row == self.header_row_num - 1:
                # skip the header row
                cur_row += 1
                continue

            # get program code assigned to the current row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)
            # get assay assigned to the current row
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            # get source id assigned to the current row
            source_id = self.get_inquiry_value_by_field_name(
                'source_id', inq_line)

            # get source config file
            # 2 values are saved in tuple: program name specific path and default one.
            # if program name specific path does not exist, the default will be used
            cfg_source_path = (
                # configuration path for the current program by name
                gc.CONFIG_FILE_SOURCE_PATH\
                    .replace('{program}', program_code)\
                    .replace('{assay}', assay)\
                    .replace('{source_id}', source_id),
                # configuration path for the default program (used if no program specific path is present)
                gc.CONFIG_FILE_SOURCE_PATH \
                    .replace('{program}', 'default') \
                    .replace('{assay}', assay) \
                    .replace('{source_id}', source_id)
            )
            # get the source location config file path
            cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace(
                '{source_id}', source_id)

            # attempt to load configuration for the program specific path
            cfg_source = ConfigData(Path(cfg_source_path[0]))
            if not cfg_source.loaded:
                # if config was not loaded from the program specific path, load the default one
                cfg_source = ConfigData(Path(cfg_source_path[1]))

            if cfg_source.loaded:
                # proceed here if the source config was loaded
                # load source location config with location specific settings for the current source
                cfg_source_location = ConfigData(
                    Path(cfg_source_location_path))
                if cfg_source_location.loaded:
                    # if the source location config was loaded, update cfg_source config with the source location config
                    cfg_source.update(
                        cfg_source_location.get_whole_dictionary())

                # get unique id of the datasource and check if the same id was used already, reuse that in such case
                inq_line_datasource_id = self.get_inquiry_line_datasource_id(
                    inq_line)
                self.logger.info(
                    'Current inquiry row #{} was identified with the following data source id: {}'
                    .format(cur_row, inq_line_datasource_id))
                # assign source id (inq_line_datasource_id) to the current inquiry line
                self.inq_line_sources[cur_row] = inq_line_datasource_id
                if inq_line_datasource_id in self.inq_sources:
                    # reuse existing datasource
                    self.logger.info(
                        'Identified data source id for the current inquiry row #{} was identified as '
                        'earlier retrieved one (for this or another row) and will be re-used for '
                        'the current row.'.format(cur_row))
                else:
                    # create a new datasource object
                    inq_line_datasource = DataSource(self, cfg_source,
                                                     inq_line,
                                                     inq_line_datasource_id)
                    self.inq_sources[
                        inq_line_datasource_id] = inq_line_datasource
            else:
                sub_al = self.get_inquiry_value_by_field_name(
                    'sub-aliquot', inq_line, False)
                _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \
                       'None of the expected to exist files is accessible: {}'\
                    .format(cur_row, sub_al, ' | '.join(cfg_source_path))
                self.logger.warning(_str)
                self.disqualify_inquiry_item(
                    sub_al, _str, cur_row
                )  # TODO: verify if inq_line should be used instead of curr_row
            cur_row += 1
        pass
示例#3
0
def process_submission():
    # load main config file
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1
    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both config were loaded, load update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())

    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new request files will be posted
    requests_loc = m_cfg.get_value('Location/requests')

    gc.DISQUALIFIED_REQUESTS = m_cfg.get_value(
        'Location/requests_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing request files will be stored
    # (one file per request)
    gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs')
    # path to the folder where all processed (and renamed) requests will be stored
    gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed')
    # path to the folder where created submission packages will be located. One package sub_folder per request.
    # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages')
    # tarball approach to be used for the current deployment
    gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach')
    # flag to save calculated md5sum to a physical file
    gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file')
    # tarball ignore directories
    ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs')
    if ignore_dirs:
        # update default ignore_dirs value with the value from a config file
        gc.TARBALL_IGNORE_DIRS = ignore_dirs

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME
    processed_folder_name = gc.REQ_PROCESSED_DIR  # gc.PROCESSED_FOLDER_NAME

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    email_attchms = []
    transfers = []

    # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles'
    requests_path = Path(requests_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']
    log_warnings = False

    mlog.info(
        'Start processing submission requests in "{}"'.format(requests_path))

    try:

        (_, _, requests) = next(walk(requests_path))
        # print('Study requests: {}'.format(requests))

        mlog.info(
            'Submission requests to be processed (count = {}): {}'.format(
                len(requests), requests))

        req_proc_cnt = 0
        errors_present = 'OK'
        req_path = ''

        # '~$' should filter out temp file created when excel is open
        requests = [file for file in requests if not file.startswith('~$')]

        for req_file in requests:
            if req_file.endswith(('xlsx', 'xls')):
                req_path = Path(requests_path) / req_file

                # transfer_path = ''  # set a default transfer path
                transfer_details = {
                    'transfer_path': '',
                    'request_file': req_file,
                    'process_handler': None,
                    'return_code': None,
                    'return_status': None
                }
                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(req_path))
                    mlog.info(
                        'Request file {} was selected for processing.'.format(
                            req_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    req_obj = Request(req_path, m_cfg)

                    if req_obj and req_obj.loaded:
                        # proceed processing request
                        mlog.info(
                            'Submission request loading status: Success. Submission request file: "{}".'
                            .format(req_path))
                        mlog.info(
                            'Loading local and project related configs for processing the request.'
                        )
                        req_obj.load_request_configuration()
                        if not req_obj.error.exist():
                            mlog.info(
                                'Local config files were loaded with no errors, proceeding to process '
                                'the request file.')
                            req_obj.process_request()
                        else:
                            mlog.info(
                                'Errors were reported during loading local config files. Aborting processing '
                                'this request.')

                        mlog.info(
                            'Processing of Submission request was finished for {}'
                            .format(req_path))

                    req_proc_cnt += 1

                    # print (req_obj.logger._cache)
                    if hasattr(req_obj.logger, '_cache'
                               ):  #verify that _cache attribute is present
                        # check if any warning were recorded to the log file and set a flag log_warnings
                        if 30 in req_obj.logger._cache and req_obj.logger._cache[
                                30]:
                            log_warnings = True
                        # else:
                        #     log_warnings = False
                    else:
                        mlog.warning(
                            'The current logger object has no "_cache" attribute - thus cannot determine '
                            'if any Warnings were reported during the process.'
                        )

                    # identify if any errors were identified and set status variable accordingly
                    if not req_obj.error.exist():
                        if not req_obj.disqualified_sub_aliquots:
                            # no disqualified sub-aliquots present
                            if not log_warnings:
                                fl_status = 'OK'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                                # errors_present = 'OK'
                            else:
                                fl_status = 'OK with Warnings'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK with Disqualifications'
                            _str = 'Processing status: "{}". Submission Request: {}'.format(
                                fl_status, req_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this request: {}' \
                            .format(fl_status, req_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            # update transfer details dictionary with the path to the transfer file
                            transfer_details['transfer_path'] = \
                                Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh'
                            transfers.append(
                                transfer_details
                            )  # add transfer details to transfers list
                            mlog.info(
                                'Since the last request was processed with "{}" status and transfer on '
                                'completion was requested ("--execute_transfer" argument was set to "yes"), '
                                'the following path was put in queue for execution: '
                                '{}'.format(fl_status,
                                            transfer_details['transfer_path']))

                    else:
                        mlog.warning(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            mlog.info(
                                'The transfer on completion request ("--execute_transfer" argument was set to '
                                '"yes") will be ignored since the last request was processed with "{}" status.'
                                .format(fl_status))

                    processed_dir = Path(processed_folder_name)
                    req_processed_name = ts + '_' + fl_status + '_' + req_file
                    file_name_new_path = cm.move_file_to_processed(
                        req_path, req_processed_name, processed_dir,
                        req_obj.logger, req_obj.error)
                    if file_name_new_path:
                        mlog.info(
                            'Processed Submission request "{}" was moved and renamed as: "{}"'
                            .format(req_path,
                                    processed_dir / req_processed_name))
                    else:
                        mlog.warning(
                            'Moving the processed request "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                req_path, req_obj.log_handler.baseFilename))

                    # deactivate the current Request logger
                    deactivate_logger_common(req_obj.logger,
                                             req_obj.log_handler)

                    if req_obj.submission_package and req_obj.submission_package.submission_dir:
                        # save transfer path to a local variable
                        transfer_path = Path(
                            req_obj.submission_package.submission_dir
                        ) / 'transfer_script.sh'
                    else:
                        transfer_path = None

                    # preps for email notification
                    email_msgs.append((
                        '-------------------------------------<br/>'
                        'Requested project: {}'.format(req_obj.project) +
                        '<br/>Requested Experiment: {}.'.format(
                            req_obj.experiment_id) +
                        ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.'
                         .format(req_path, processed_dir /
                                 req_processed_name) if file_name_new_path else
                         '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due '
                         'to some errors</font> reported in the request\'s log file.'
                         .format(req_path)) + '<br/> <b>Errors summary:</b> {}'
                        '<br/> <b>Warning(s) reported:</b> {}'
                        '<br/> <i>Log file location: <br/>{}</i>'
                        '<br/> Submission package locatoin:<br/>{}'
                        '<br/> Data source locatoin:<br/>{}'
                        '<br/> Processed Aliquots:<br/>{}'
                        '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}'
                        '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}'
                        '<br/> Automatic data transferring: {}'
                        '<br/> Command line to run data transferring manually: <br/> {}'
                        ''.format(
                            '<font color="red">Check Errors in the log file.</font>'
                            if req_obj.error.exist() else
                            '<font color="green">No Errors</font> ',
                            '<font color="red">Yes - check the log file.</font>'
                            if log_warnings else 'No',
                            req_obj.log_handler.baseFilename,
                            req_obj.submission_package.submission_dir
                            if req_obj.submission_package else 'N/A',
                            req_obj.attachments.data_loc if req_obj.attachments
                            else 'N/A', req_obj.qualified_aliquots
                            if req_obj.qualified_aliquots else 'None', [
                                val for val in
                                req_obj.disqualified_sub_aliquots.keys()
                            ] if req_obj.disqualified_sub_aliquots else 'None',
                            req_obj.disqualified_request_path,
                            '<font color="green">Performed.</font> '
                            'Additional email should be sent upon data transfer completion.'
                            if len(
                                str(transfer_details['transfer_path']).strip())
                            > 0 else 'Not performed.',
                            str(
                                Path(req_obj.submission_package.submission_dir)
                                / 'transfer_script.sh')
                            if req_obj.submission_package else 'N/A')))
                    email_attchms.append(req_obj.log_handler.baseFilename)

                    # print ('email_msgs = {}'.format(email_msgs))

                    req_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, req_path, traceback.format_exc()))
                    raise

        mlog.info('Number of processed Submission requests = {}'.format(
            req_proc_cnt))

        if req_proc_cnt > 0:
            # collect final details and send email about this study results
            email_subject = 'processing of Submission Requests '
            if errors_present == 'OK':
                if not log_warnings:
                    email_subject = 'SUCCESSFUL ' + email_subject
                else:
                    email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject
            elif errors_present == 'DISQUALIFY':
                email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
            else:
                email_subject = 'ERROR(s) present during ' + email_subject

            email_body = (
                'Number of requests processed: {}.'.format(req_proc_cnt) +
                '<br/><br/>' + '<br/><br/>'.join(email_msgs))
            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=m_cfg.get_value('Email/sent_to_emails'),
                        subject=email_subject,
                        message=email_body,
                        main_conf=m_cfg
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            # perform transfers, if anything qualifies for it
            if transfers and len(transfers) > 0:
                transfer_status_checking_delay = m_cfg.get_value(
                    'General/transfer_status_checking_delay')
                if transfer_status_checking_delay and str(
                        transfer_status_checking_delay).isnumeric():
                    if transfer_status_checking_delay > 0:
                        pass
                    else:
                        transfer_status_checking_delay = None
                else:
                    transfer_status_checking_delay = None

                mlog.info(
                    'Starting processing requested transfers. Total count: {} transfers.'
                    .format(len(transfers)))
                # process all collected transfer requests
                cm.process_transfers(transfers, mlog,
                                     transfer_status_checking_delay)

                # assess results of the transfer processing
                transfer_ok = 0
                transfer_err = 0
                transfer_nd = 0
                for transfer in transfers:
                    if transfer['return_status']:
                        if transfer['return_status'][:2] == 'OK':
                            transfer_ok += 1
                        elif transfer['return_status'][:5] == 'ERROR':
                            transfer_err += 1
                        else:
                            transfer_nd += 1
                    else:
                        transfer_nd += 1

                _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \
                       'transfer(s)'.format(transfer_ok, transfer_err)
                if transfer_nd > 0:
                    _str = _str + ', "ND" - {}'.format(transfer_nd)
                mlog.info(_str)

                # send email with the status of the transfers
                if transfers and len(transfers) > 0:
                    if transfer_err > 0:
                        email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)'
                    else:
                        email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)'

                    email_transfer_msgs = []
                    for transfer in transfers:
                        email_transfer_msgs.append(
                            ('Transfer process for the request file: "{}" '
                             '<br/>Transfer script file:<br/>{}'
                             '<br/>Completion status:<br/>{}'.format(
                                 transfer['request_file'],
                                 transfer['transfer_path'],
                                 transfer['return_status'])))

                    email_body = (
                        'Summary of transfer of prepared submissions:'
                        '<br/>Total count of completed transfers: {}. '
                        '<br/>Status "OK": {} transfer(s)'
                        '<br/>Status "ERROR": {} transfer(s)'
                        '<br/>Status "Not Defined": {} transfer(s)'
                        '<br/><br/>The following are details for each performed transfer:'
                        '<br/><br/>'.format(
                            len(transfers), '<font color="green">' +
                            str(transfer_ok) + '</font>' if transfer_ok > 0
                            else transfer_ok, '<font color="red">' +
                            str(transfer_err) + '</font>' if transfer_err > 0
                            else transfer_err, transfer_nd) +
                        '<br/><br/>'.join(email_transfer_msgs))

                    try:
                        if m_cfg.get_value('Email/send_emails'):
                            email.send_yagmail(emails_to=m_cfg.get_value(
                                'Email/sent_to_emails'),
                                               subject=email_subject,
                                               message=email_body,
                                               main_conf=m_cfg)
                    except Exception as ex:
                        # report unexpected error during sending emails to a log file and continue
                        _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                               'finishing automated transfers. \n{} '\
                            .format(ex, traceback.format_exc())
                        mlog.critical(_str)

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()
def process_download_inquiries():

    # load main config file and get required values
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1

    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both configs were loaded, update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())
    # print ('m_cfg = {}'.format(m_cfg.cfg))
    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new inquiry files will be posted
    inquiries_loc = m_cfg.get_value('Location/inquiries')

    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing inquiry files will be stored
    # (one file per inquiry)
    gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path')
    # path to the folder where all processed (and renamed) inquiries will be stored
    gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value(
        'Location/inquiries_processed_relative_path')
    # get config setting for the processed_add_datestamp and save it to global const module
    processed_add_datestamp = m_cfg.get_value(
        'Location/processed_add_datestamp')
    if processed_add_datestamp:
        gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp
    # path to the folder where created submission packages will be located. One package sub_folder per inquiry.
    gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests')
    # path to dir with dynamically created inquiry files for disqualified aliquots
    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified_path')

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME

    # this variable define if Data Downloader app will be executed at the end of processing inquiries
    run_data_download = m_cfg.get_value('Execute/run_data_downloader')
    # path to the Data Downloader tool
    gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path')

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    # email_attchms = []

    inquiries_path = Path(inquiries_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']

    mlog.info(
        'Start processing download inquiries in "{}"'.format(inquiries_path))

    try:

        (root, source_inq_dirs, _) = next(walk(inquiries_path))

        inq_proc_cnt = 0
        errors_present = 'OK'

        for inq_dir in source_inq_dirs:
            source_inquiry_path = Path(root) / inq_dir
            mlog.info(
                'Selected for processing inquiry source: "{}", full path: {}'.
                format(inq_dir, source_inquiry_path))

            (_, _, inq_files) = next(walk(source_inquiry_path))

            # filter only excel files for processing as inquiries
            inquiries = [
                fl for fl in inq_files if fl.endswith(('xlsx', 'xls'))
            ]
            # filter out temp files (starting with '~$') created when an excel file is open
            inquiries = [fl for fl in inquiries if not fl.startswith('~$')]

            mlog.info('Inquiry files presented (count = {}): "{}"'.format(
                len(inquiries), inquiries))

            for inq_file in inquiries:
                inq_path = Path(source_inquiry_path) / inq_file

                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(inq_path))
                    mlog.info('The following Inquiry file was selected: "{}".'.
                              format(inq_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    inq_obj = Inquiry(inq_path, m_cfg)

                    if inq_obj and inq_obj.loaded:
                        # proceed processing inquiry
                        mlog.info('Inquiry file was successfully loaded.')
                        mlog.info(
                            'Starting processing Download Inquiry file: "{}".'.
                            format(inq_path))

                        inq_obj.process_inquiry()

                        mlog.info(
                            'Processing of Download Inquiry was finished for {}'
                            .format(inq_path))

                    inq_proc_cnt += 1

                    # identify if any errors were identified and set status variable accordingly
                    if not inq_obj.error.exist():
                        if not inq_obj.disqualified_items:
                            # no disqualified sub-aliquots present
                            fl_status = 'OK'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            # errors_present = 'OK'  # this variable is set to OK by default, no update needed
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK_with_Disqualifications'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \
                            .format(fl_status, inq_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                    else:
                        mlog.warning(_str)

                    processed_dir = inq_obj.processed_folder  # 'Processed'
                    # combine the name of the processed file
                    inq_processed_name = fl_status + '_' + str(
                        inq_file).replace(' ', '_').replace('__', '_')
                    if gc.PROCESSED_ADD_DATESTAMP:
                        inq_processed_name = ts + '_' + inq_processed_name
                    # move processed files to Processed folder
                    fl_processed_name = cm.move_file_to_processed(
                        inq_path, inq_processed_name, processed_dir,
                        inq_obj.logger, inq_obj.error)
                    if fl_processed_name:
                        mlog.info(
                            'Processed file "{}" was moved(renamed) to: "{}"'.
                            format(inq_path,
                                   processed_dir / fl_processed_name))
                    else:
                        errors_present = errors_present + '|MoveProcessedError'
                        mlog.warning(
                            'Moving the processed file "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                inq_path, inq_obj.log_handler.baseFilename))

                    # preps for email notification
                    # create a dictionary to feed into template for preparing an email body
                    template_feeder = {
                        'file_num':
                        inq_proc_cnt,
                        'file_path':
                        str(inq_path),
                        'file_path_new':
                        (str(processed_dir / fl_processed_name)
                         if processed_dir and fl_processed_name else None),
                        'inq_obj_errors_cnt':
                        inq_obj.error.count,
                        'log_file_path':
                        inq_obj.log_handler.baseFilename,
                        'dld_request_file_path':
                        str(inq_obj.download_request_path),
                        'inq_sources':
                        inq_obj.inq_sources,
                        'inq_match_aliquots':
                        inq_obj.inq_match_arr,
                        'inq_disqul_aliquots':
                        inq_obj.disqualified_items,
                        'inq_disqul_reprocess_path':
                        str(inq_obj.disqualified_inquiry_path)
                    }
                    email_body_part = cm.populate_email_template(
                        'processed_inquiry.html', template_feeder)
                    email_msgs.append(email_body_part)

                    # deactivate the current Inquiry logger
                    deactivate_logger_common(inq_obj.logger,
                                             inq_obj.log_handler)
                    inq_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, inq_path, traceback.format_exc()))
                    raise

        mlog.info('Number of successfully processed Inquiries = {}'.format(
            inq_proc_cnt))

        # start Data Download request if proper config setting was provided
        dd_status = {'status': '', 'message': ''}
        if run_data_download:
            # start process
            mlog.info(
                'Starting asynchronously Data Downloader app: "{}".'.format(
                    gc.DATA_DOWNLOADER_PATH))
            try:
                dd_process = cm.start_external_process_async(
                    gc.DATA_DOWNLOADER_PATH)
                # check if it is running
                dd_status = cm.check_external_process(dd_process)
                mlog.info(
                    'Status of running Data Downloader app: "{}".'.format(
                        dd_status))
            except Exception as ex:
                # report unexpected error during starting Data Downloader
                _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \
                    .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc())
                mlog.critical(_str)
                dd_status = {'status': 'Error', 'message': _str}

        mlog.info('Preparing to send notificatoin email.')

        email_to = m_cfg.get_value('Email/send_to_emails')
        email_subject = 'processing of download inquiry. '

        if inq_proc_cnt > 0:  # inquiries and len(inquiries) > 0:
            # collect final details and send email about this study results

            err_present = errors_present.split(
                '|'
            )  # get all statuses into an array; 1st element is the main status
            if err_present:
                # set email subject based on the main status err_present[0]
                if err_present[0] == 'OK':
                    email_subject = 'SUCCESSFUL ' + email_subject
                elif err_present[0] == 'DISQUALIFY':
                    email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
                else:
                    email_subject = 'ERROR(s) present during ' + email_subject
            if len(err_present) > 1:
                if err_present[1] == 'MoveProcessedError':
                    email_subject = email_subject + ' Error moving inquiry to processed.'

            if dd_status and 'status' in dd_status.keys(
            ) and dd_status['status'].lower() == 'error':
                email_subject = email_subject + ' Errors starting Data Downloader.'

            # create a dictionary to feed into template for preparing an email body
            template_feeder = {
                'inq_cnt': inq_proc_cnt,
                'run_data_download': run_data_download,
                'downloader_path': gc.DATA_DOWNLOADER_PATH,
                'downloader_start_status': dd_status['status'].lower(),
                'processed_details': '<br/>'.join(email_msgs)
            }
            email_body = cm.populate_email_template('processed_inquiries.html',
                                                    template_feeder)

            # remove return characters from the body of the email, to keep just clean html code
            email_body = email_body.replace("\r", "")
            email_body = email_body.replace("\n", "")

            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            mlog.info(
                'Sending a status email with subject "{}" to "{}".'.format(
                    email_subject, email_to))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=email_to,
                        subject=email_subject,
                        message=email_body
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            mlog.info(
                'End of processing of download inquiries in "{}".'.format(
                    inquiries_path))

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()