def _before(self):
     self.__pipeline_result_object.file_path_pipeline_session = config_manager.get_app_config_manager() \
         .get_session_working_dir()
     # Add this pipeline session log files to the final report
     self.__pipeline_result_object.add_log_files(config_manager.get_app_config_manager().get_session_log_files())
     # TODO Check that the trackhub URL is valid
     return True
Пример #2
0
def modules_bootstrap():
    ensembl_config_file = config_manager.get_app_config_manager(
    ).get_file_name_config_modules_ensembl_service()
    __logger.debug(
        "Setting Ensembl configuration file -- {}".format(ensembl_config_file))
    ensembl.service.set_configuration_file(ensembl_config_file)
    # TODO - Should I delegate this to a main entry point for every module?.
    # TODO - REFACTOR THIS IN THE FUTURE, WHEN MODULE FUNCTIONALITY HAS BEEN TESTED
    __logger.debug(
        "Setting Ensembl Data Downloader configuration file -- {}".format(
            config_manager.get_app_config_manager(
            ).get_file_name_config_modules_ensembl_data_downloader()))
    ensembl.data_downloader.set_configuration_file(
        config_manager.get_app_config_manager(
        ).get_file_name_config_modules_ensembl_data_downloader())
Пример #3
0
 def __init__(self, configuration_object, configuration_file, pipeline_arguments):
     super(DirectorConfigurationManager, self).__init__(configuration_object, configuration_file)
     self.__pipeline_arguments = pipeline_arguments
     self.__pipeline_arguments_object = None
     # Logger the pythonist way
     self._logger = config_manager.get_app_config_manager().get_logger_for(
         "{}.{}".format(__name__, type(self).__name__))
Пример #4
0
class TestCommandLineRunner(unittest.TestCase):
    __logger = config_manager.get_app_config_manager().get_logger_for(__name__)

    def test_success_on_running_simple_command_without_timeout(self):
        command = "echo Successful_run"
        runner = CommandLineRunnerFactory.get_command_line_runner()
        runner.command = command
        runner.start()
        runner.wait()
        self.assertTrue(runner.command_success,
                        "Command finishes with success")
        self.__logger.debug(
            "Command '{}', STDOUT - '{}', STDERR - '{}'".format(
                command,
                runner.get_stdout().decode('utf8'),
                runner.get_stderr().decode('utf8')))

    def test_simple_commands_with_parallel_runner_manager(self):
        commands = [
            "echo Successful_run-{:03}".format(i) for i in range(0, 16)
        ]
        parallel_runner_manager = ParallelRunnerManagerFactory.get_parallel_runner_manager(
        )
        for command in commands:
            runner = CommandLineRunnerFactory.get_command_line_runner()
            runner.command = command
            parallel_runner_manager.add_runner(runner)
        parallel_runner_manager.start_runners()
        parallel_runner_manager.wait_all()
        for runner in parallel_runner_manager.get_finished_runners():
            self.assertTrue(runner.is_done(), "Runner is Done")
            self.assertTrue(runner.command_success,
                            "Run command was successful")
 def __init__(self, configuration_object, configuration_file):
     super(ConfigurationManager, self).__init__(configuration_object,
                                                configuration_file)
     self.__logger = config_manager.get_app_config_manager().get_logger_for(
         __name__)
     # Local Ensembl repo parent folder name
     self.__local_folder_ensembl_repo = 'ensembl'
 def __runmode_test_run_cluster_file_exporter(self):
     """
     This method is a helper that I'm using for building this pipeline, it is not even clear whether this pipeline
     will have a "testing / development mode" where the most expensive parts of it are dummied, that's why I don't
     think the code will stay, thus, I'm not spending much time on getting this code fit in the software in a
     sensible way
     :return: True if success on preparing the dummy data, False otherwise
     """
     cluster_file_exporter_destination_folder = self \
         ._get_configuration_manager() \
         .get_cluster_file_exporter_destination_folder()
     rsync_source_folder = os.path.join(
         config_manager.get_app_config_manager().get_folder_resources(),
         os.path.join("tests", "cluster-file-exporter"))
     # Rsync the dummy data into the destination folder
     rsync_command = "rsync -vah --progress --stats {}/ {}/" \
         .format(rsync_source_folder, cluster_file_exporter_destination_folder)
     rsync_subprocess = subprocess.Popen(rsync_command, shell=True)
     try:
         # TODO - WARNING - OMG! Magic number there!
         stdout, stderr = rsync_subprocess.communicate(timeout=600)
     except subprocess.TimeoutExpired as e:
         self._get_logger().error(
             "TIMEOUT error while rsyncing dummy cluster-file-exporter data, KILLING subprocess"
         )
         rsync_subprocess.kill()
         stdout, stderr = rsync_subprocess.wait()
         return False
     return True
Пример #7
0
 def __init__(self, username, password):
     self.logger = config_manager.get_app_config_manager().get_logger_for("{}.{}"
                                                                          .format(__name__, type(self).__name__))
     self.username = username
     self.password = password
     self.trackhub_registry_base_url = 'https://www.trackhubregistry.org'
     self.__auth_token = None
Пример #8
0
 def test_gunzip_files(self):
     file_url = 'ftp://ftp.ensembl.org/pub/release-89/gtf/homo_sapiens/Homo_sapiens.GRCh38.89.abinitio.gtf.gz'
     file_name = file_url[file_url.rfind('/') + 1:]
     file_name_uncompressed = file_name[:file_name.rfind('.')]
     # Download the file to the session working directory
     destination_folder = config_manager.get_app_config_manager(
     ).get_session_working_dir()
     destination_file_path = os.path.join(destination_folder, file_name)
     destination_file_path_uncompressed = os.path.join(
         destination_folder, file_name_uncompressed)
     self.__logger.info(
         "Using test file '{}', from '{}' for testing gunzip functionality at folder '{}'"
         .format(file_name, file_url, destination_folder))
     download_manager = DownloadManager([file_url], destination_folder,
                                        self.__logger)
     download_manager.start_downloads()
     download_manager.wait_all()
     self.assertTrue(
         download_manager.is_success(),
         "Test files for gunzip unit test downloaded successfully")
     errors = general_toolbox.gunzip_files([destination_file_path])
     self.assertTrue(
         not errors,
         "No errors uncompressing test files for unit testing gunzip feature"
     )
     self.assertTrue(
         os.path.isfile(destination_file_path_uncompressed),
         "The test file has been uncompressed, '{}'".format(
             destination_file_path_uncompressed))
     self.assertTrue(
         os.path.getsize(destination_file_path_uncompressed) > 0,
         "The uncompressed test file '{}' is not empty".format(
             destination_file_path_uncompressed))
Пример #9
0
 def __init__(self):
     super().__init__()
     # The default destination folder for exporting the trackhub is located within the current session working
     # directory
     self.track_hub_destination_folder = os.path.join(
         config_manager.get_app_config_manager().get_session_working_dir(),
         'track_hub')
Пример #10
0
 def __init__(self, species_data):
     self.__logger = config_manager.get_app_config_manager().get_logger_for(
         __name__)
     # I've changed this, we store the original species data, and then we offer two different views
     self.__ensembl_species_data_raw = species_data
     self.__ensembl_species_data_dao = None
     self.__index_by_taxonomy_id = None
     self.__index_by_assembly = None
Пример #11
0
 def get_file_path_binary_bed_to_bigbed_conversion_tool(self):
     """
     Get absolute path to the binary tool to convert from 'bed' file to 'bigBed' file format
     :return: absolute path to 'bed to bigBed' conversion tool
     """
     return os.path.join(config_manager.get_app_config_manager().get_folder_bin(),
                         os.path.join(self._CONFIG_UCSC_TOOLSUITE_SUBFOLDER_NAME,
                                      self._CONFIG_UCSC_TOOLSUITE_BEDTOBIGBED_BINARY_FILE_NAME))
 def get_pogo_binary_file_path(self):
     """
     Again, at this iteration of the software lifecycle, it duplicates the method from the application wide
     configuration manager but, it belongs here, and this extra level of abstraction will help us in the process
     of refactoring the parameter.
     :return: absolute file path to the pogo binary
     """
     return config_manager.get_app_config_manager(
     ).get_pogo_binary_file_path()
Пример #13
0
 def __init__(self):
     super().__init__()
     # The default destination folder for exporting the trackhub is located within the current session working
     # directory
     self.track_hub_destination_folder = os.path.join(
         config_manager.get_app_config_manager().get_session_working_dir(),
         'track_hub')
     # By default we're working with an empty export summary
     self.export_summary = TrackHubExportSummary()
 def get_local_path_folder_ensembl_repo(self):
     """
     Get the absolute path to the local folder where we are going to store all the data from the different releases
     of Ensembl
     :return: absolute path of the local repository for Ensembl releases data
     """
     return os.path.abspath(
         os.path.join(
             config_manager.get_app_config_manager().get_folder_resources(),
             self.__local_folder_ensembl_repo))
 def get_pogo_run_timeout(self):
     """
     This method is duplicating the one at the application wide configuration manager at this stage and iteration
     of this application development with the idea of refactoring it out of the application wide configuration
     manager in the future, as this is a parameter that, even if it's configurable in the future, is within the
     responsiblity boundaries of the 'pogo' module. At this iteration of the software lifecycle, it just returns a
     default value, but later on, it can be made configurable as a parameter to this software.
     :return: configured timeout (seconds) for running PoGo
     """
     return config_manager.get_app_config_manager().get_pogo_run_timeout()
Пример #16
0
 def __init__(self):
     self.logger = config_manager.get_app_config_manager().get_logger_for(
         "{}.{}".format(__name__, type(self).__name__))
     # hub.txt URL
     self.url = None
     self.assembly_accession_map = {}
     # Trackhub is public by default
     self.public = '1'
     # Default type for trackhubs is PROTEOMICS
     self.type = 'PROTEOMICS'
Пример #17
0
 def get_local_path_root_ensembl_repo(self):
     if self.__local_path_ensembl_repo is None:
         # For improved reading and code maintenance later
         resources_folder_path = os.path.abspath(
             config_manager.get_app_config_manager().get_folder_resources())
         root_folder_ensembl_repo = self._get_configuration_manager(
         ).get_local_path_folder_ensembl_repo()
         self.__local_path_ensembl_repo = os.path.join(
             resources_folder_path, root_folder_ensembl_repo)
     return self.__local_path_ensembl_repo
Пример #18
0
 def __init__(self, configuration_object, configuration_file):
     self.__logger = config_manager.get_app_config_manager().get_logger_for(
         __name__)
     self._get_logger().debug(
         "Using configuration file '{}'".format(configuration_file))
     self.__config_manager = ConfigurationManager(configuration_object,
                                                  configuration_file)
     # Ensembl Release Number
     self.__release_number = None
     # Ensembl Species Data
     self.__species_data_service = None
Пример #19
0
def app_bootstrap():
    global __run_test_mode
    global __logger
    global __args
    __args = get_cmdl()
    # Initialize configuration module
    if __args.config_file:
        config_manager.set_application_config_file(__args.config_file)
    else:
        config_manager.set_application_config_file(__DEFAULT_CONFIG_FILE)
    if __args.testmode:
        __run_test_mode = True
    # Request the main logger
    __logger = config_manager.get_app_config_manager().get_logger_for(__name__)
    if __run_test_mode:
        __logger.info(
            "Session '{}' STARTED, RUNNING UNIT TESTS".format(config_manager.get_app_config_manager().get_session_id()))
    else:
        __logger.info(
            "Session '{}' STARTED".format(config_manager.get_app_config_manager().get_session_id()))
Пример #20
0
def get_browser_instance():
    logger = config_manager.get_app_config_manager().get_logger_for(
        "{}.{}".format(__name__, "get_browser_instance"))
    folder_prefix = os.path.join(
        config_manager.get_app_config_manager().get_session_working_dir(),
        "browser_profile_no")
    profile_folder = "{}{}".format(folder_prefix, uuid.uuid4())
    general_toolbox.check_create_folders([profile_folder])
    logger.debug("Creating Browser instance, profile folder at '{}'".format(
        profile_folder))
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument("user-data-dir={}".format(profile_folder))
    browser = webdriver.Chrome(
        executable_path=config_manager.get_app_config_manager(
        ).get_path_chrome_driver(),
        chrome_options=chrome_options)
    browser.implicitly_wait(3)
    return browser
Пример #21
0
def app_bootstrap():
    global __run_test_mode
    global __logger
    global __args
    __args = get_cmdl()
    if __args.config_file:
        config_manager.set_application_config_file(__args.config_file)
    else:
        config_manager.set_application_config_file(__DEFAULT_CONFIG_FILE)
    if __args.pipeline_name:
        config_manager.set_pipeline_name(__args.pipeline_name)
        if __args.pipeline_name == 'test':
            __run_test_mode = True
    __logger = config_manager.get_app_config_manager().get_logger_for(__name__)
    if __run_test_mode:
        __logger.info("Session '{}' STARTED, RUNNING UNIT TESTS".format(
            config_manager.get_app_config_manager().get_session_id()))
    else:
        __logger.info("Session '{}' STARTED, pipeline '{}'".format(
            config_manager.get_app_config_manager().get_session_id(),
            __args.pipeline_name))
    def get_cluster_file_exporter_jar_path(self):
        """
        Get the path to cluster-file-exporter jar file for running the software

        This is computed here just in case I want to make it either a configuration parameter or a command line argument
        in the near future
        :return: cluster-file-exporter jar file path
        """
        return os.path.join(
            config_manager.get_app_config_manager().get_folder_bin(),
            os.path.join(self._CONFIG_CLUSTER_FILE_EXPORTER_BIN_SUBFOLDER,
                         self._CONFIG_CLUSTER_FILE_EXPORTER_JAR_FILE_NAME))
Пример #23
0
 def _before(self):
     # Set Pipeline Session working directory
     self.__pipeline_result_object.file_path_pipeline_session = \
         config_manager.get_app_config_manager().get_session_working_dir()
     # Add this pipeline session log files to the final report
     self.__pipeline_result_object.add_log_files(
         config_manager.get_app_config_manager().get_session_log_files())
     # Add information about the Ensembl Release being used
     self.__pipeline_result_object.ensembl_release = str(
         ensembl.service.get_service().get_release_number())
     if self.__config_manager.get_project_data_file_path():
         self._get_logger().info(
             "Reading Project Trackhub Descriptor from file at '{}'".format(
                 self.__config_manager.get_project_data_file_path()))
         self.__project_trackhub_descriptor = \
             ProjectTrackhubDescriptor(self.__config_manager.get_project_data_file_path())
         # Check that the destination folder exists
         if not os.path.isdir(self.__project_trackhub_descriptor.
                              get_trackhub_destination_path()):
             error_message = "Trackhub destination path NOT VALID, '{}'" \
                 .format(self.__project_trackhub_descriptor.get_trackhub_destination_path())
             self._get_logger().error(error_message)
             self.__pipeline_result_object.add_error_message(error_message)
             self.set_pipeline_status_fail()
             return False
         # Check valid project tracks
         if not self.__get_valid_project_tracks():
             # It makes no sense to go ahead if this project has no valid tracks
             error_message = "Project Trackhub contains NO VALID TRACKS"
             self._get_logger().error(error_message)
             self.__pipeline_result_object.add_error_message(error_message)
             self.set_pipeline_status_fail()
             return False
         return True
     error_message = "INVALID / MISSING Project Trackhub Descriptor file, '{}'" \
         .format(self.__config_manager.get_project_data_file_path())
     self._get_logger().error(error_message)
     self.__pipeline_result_object.add_error_message(error_message)
     self.set_pipeline_status_fail()
     return False
 def get_local_path_root_ensembl_repo(self):
     """
     Get the local root folder where all ensembl data releases are going to be made locally available
     :return: the local folder that will contain all Ensembl releases data, e.g. .../resources/ensembl
     """
     if self.__local_path_ensembl_repo is None:
         # For improved reading and code maintenance later
         resources_folder_path = os.path.abspath(
             config_manager.get_app_config_manager().get_folder_resources())
         root_folder_ensembl_repo = self._get_configuration_manager(
         ).get_local_path_folder_ensembl_repo()
         self.__local_path_ensembl_repo = os.path.join(
             resources_folder_path, root_folder_ensembl_repo)
     return self.__local_path_ensembl_repo
Пример #25
0
 def test_success_on_sample_files_download(self):
     urls = ['http://ipv4.download.thinkbroadband.com/5MB.zip',
             'http://ipv4.download.thinkbroadband.com/10MB.zip',
             'http://ipv4.download.thinkbroadband.com/20MB.zip',
             'http://ipv4.download.thinkbroadband.com/50MB.zip']
     destination_folder = config_manager.get_app_config_manager().get_session_working_dir()
     # Log the test environment
     self.__logger.info("Sample file URLs to download: {}".format(",".join(urls)))
     self.__logger.info("Destination folder for the downloads, '{}'".format(destination_folder))
     # Get the download manager and start the downloads
     download_manager = DownloadManager(urls, destination_folder, self.__logger)
     download_manager.start_downloads()
     download_manager.wait_all()
     self.assertTrue(download_manager.is_success(), "Files downloaded successfully")
    def get_cluster_file_exporter_destination_folder(self):
        """
        Get the destination folder for the cluster file exporter result files, it will typically be a subfolder of the
        current running session working directory.

        This is computed here just in case I want to make it either a configuration parameter or a command line argument
        in the near future
        :return: destination folder for pride cluster-file-exporter result files
        """
        destination_folder = os.path.join(
            config_manager.get_app_config_manager().get_session_working_dir(),
            self._CONFIG_CLUSTER_FILE_EXPORTER_WORKING_SUBDIR)
        # Make sure the folder is there
        general_toolbox.check_create_folders([destination_folder])
        return destination_folder
Пример #27
0
 def __init__(self, pogo_runner):
     """
     Just the constructor, I had this implemented as syntactic sugar, but I was wrong
     :param ncbi_taxonomy_id: ncbi taxonomy id for this PoGo run results
     :param pogo_source_file_path: path of the source file used to run PoGo
     :param protein_sequence_file_path: FASTA file
     :param gtf_file_path: GTF file
     """
     # Logging
     self.__logger = main_app_config_manager.get_app_config_manager(
     ).get_logger_for("{}.{}".format(__name__,
                                     type(self).__name__))
     # Map<pogo_result_file_extension, pogo_result_file_path>
     self.__pogo_result_file_paths = {}
     self.pogo_runner = pogo_runner
Пример #28
0
class TestEnsemblDataDownloader(unittest.TestCase):
    __logger = config_manager.get_app_config_manager().get_logger_for(__name__)

    def test_get_protein_sequences_for_human(self):
        human_ncbi_tax_id = '9606'
        ensembl_downloader_service = ensembl.data_downloader.get_data_download_service(
        )
        ensembl_downloader_service.get_protein_sequences_for_species(
            human_ncbi_tax_id)

    def test_get_gtf_for_human(self):
        human_ncbi_tax_id = '9606'
        ensembl_downloader_service = ensembl.data_downloader.get_data_download_service(
        )
        ensembl_downloader_service.get_genome_reference_for_species(
            human_ncbi_tax_id)
 def __init__(self, configuration_object, configuration_file):
     self.__logger = config_manager.get_app_config_manager().get_logger_for(
         __name__)
     self._get_logger().debug(
         "Using configuration file '{}'".format(configuration_file))
     self.__config_manager = ConfigurationManager(configuration_object,
                                                  configuration_file)
     self.__local_path_ensembl_repo = None
     self.__local_path_ensembl_release = None
     self.__remote_path_ensembl_release = None
     # Name for the current release
     self.__ensembl_release_name = None
     # Name for the subfolder that contains per species fasta files
     self.__folder_name_fasta = None
     # Name for the subfolder of species folder that contains protein sequences files
     self.__folder_name_protein_sequences = None
 def __sync_filesystem(self, trackhub_exporter):
     if self._get_configuration_manager().is_do_sync():
         # Sync script parameters
         sync_script_launcher = self._get_configuration_manager(
         ).get_path_script_filesystem_sync()
         app_root_dir = config_manager.get_app_config_manager(
         ).get_application_root_folder()
         source_trackhub_container_folder = os.path.dirname(
             trackhub_exporter.track_hub_destination_folder)
         source_trackhub_folder = trackhub_exporter.track_hub_destination_folder
         # Build the synchronization command
         sync_command = "{} {} {} {}".format(
             sync_script_launcher, app_root_dir,
             source_trackhub_container_folder, source_trackhub_folder)
         self._get_logger().info(
             "Filesystem synchronization command '{}'".format(sync_command))
         sync_subprocess = subprocess.Popen(sync_command, shell=True)
         stdout = ''
         stderr = ''
         try:
             stdout, stderr = sync_subprocess \
                 .communicate(timeout=self._get_configuration_manager().get_filesystem_sync_run_timeout())
         except subprocess.TimeoutExpired as e:
             exception_message = "TIMEOUT ERROR while running Filesystem synchronization script '{}'," \
                                 " Command: '{}'\n" \
                                 "STDOUT: '{}'\n" \
                                 "STDERR: '{}'" \
                 .format(self._get_configuration_manager().get_path_script_filesystem_sync(),
                         sync_command,
                         stdout,
                         stderr)
             self._get_logger().error(exception_message)
             sync_subprocess.kill()
             stdout, stderr = sync_subprocess.communicate()
             raise pipeline_exceptions.PipelineDirectorException(
                 exception_message) from e
         if sync_subprocess.poll() and (sync_subprocess.returncode != 0):
             error_msg = "ERROR while running Filesystem synchronization script '{}'," \
                         " Command: '{}'\n" \
                         "STDOUT: '{}'\n" \
                         "STDERR: '{}'" \
                 .format(self._get_configuration_manager().get_path_script_filesystem_sync(),
                         sync_command,
                         stdout,
                         stderr)
             self._get_logger().error(error_msg)
             raise pipeline_exceptions.PipelineDirectorException(error_msg)