def test_locate_flowcell(self): flowcell_name = "temp_flowcell" tmp_dir = tempfile.mkdtemp() config = {"environment": {"flowcell_inbox": tmp_dir}} with self.assertRaises(ValueError): # Should raise ValueError if flowcell can't be found locate_flowcell(flowcell=flowcell_name, config=config) tmp_flowcell_path = os.path.join(tmp_dir, flowcell_name) with self.assertRaises(ValueError): # Should raise ValueError as path given doesn't exist locate_flowcell(flowcell=tmp_flowcell_path, config=config) os.makedirs(tmp_flowcell_path) # Should return the path passed in self.assertEqual(locate_flowcell(flowcell=tmp_flowcell_path, config=config), tmp_flowcell_path) # Should return the full path after searching flowcell_inbox self.assertEqual(locate_flowcell(flowcell=flowcell_name, config=config), tmp_flowcell_path)
def test_locate_flowcell(self): flowcell_name = "temp_flowcell" tmp_dir = tempfile.mkdtemp() config = {'environment': {'flowcell_inbox': tmp_dir}} with self.assertRaises(ValueError): # Should raise ValueError if flowcell can't be found locate_flowcell(flowcell=flowcell_name, config=config) tmp_flowcell_path = os.path.join(tmp_dir, flowcell_name) with self.assertRaises(ValueError): # Should raise ValueError as path given doesn't exist locate_flowcell(flowcell=tmp_flowcell_path, config=config) os.makedirs(tmp_flowcell_path) # Should return the path passed in self.assertEqual(locate_flowcell(flowcell=tmp_flowcell_path, config=config), tmp_flowcell_path) # Should return the full path after searching flowcell_inbox self.assertEqual(locate_flowcell(flowcell=flowcell_name, config=config), tmp_flowcell_path)
def organize_projects_from_flowcell(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, fallback_libprep=None, quiet=False, create_files=True, config=None, config_file_path=None): """Sort demultiplexed Illumina flowcells into projects and return a list of them, creating the project/sample/libprep/seqrun dir tree on disk via symlinks. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param bool quiet: Don't send notification emails :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True) :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. :returns: A list of NGIProject objects. :rtype: list :raises RuntimeError: If no (valid) projects are found in the flowcell dirs """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() for demux_fcid_dir in demux_fcid_dirs_set: try: # Get the full path to the flowcell if it was passed in as just a name demux_fcid_dir = locate_flowcell(demux_fcid_dir) except ValueError as e: # Flowcell path couldn't be found/doesn't exist; skip it LOG.error('Skipping flowcell "{}": {}'.format(demux_fcid_dir, e)) continue # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = \ setup_analysis_directory_structure(fc_dir=demux_fcid_dir, projects_to_analyze=projects_to_analyze, restrict_to_projects=restrict_to_projects, restrict_to_samples=restrict_to_samples, create_files=create_files, fallback_libprep=fallback_libprep, config=config, quiet=quiet) if not projects_to_analyze: if restrict_to_projects: error_message = ("No projects found to process: the specified flowcells " "({fcid_dirs}) do not contain the specified project(s) " "({restrict_to_projects}) or there was an error " "gathering required information.").format( fcid_dirs=",".join(demux_fcid_dirs_set), restrict_to_projects=",".join(restrict_to_projects)) else: error_message = ("No projects found to process in flowcells {} " "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) raise RuntimeError(error_message) else: projects_to_analyze = projects_to_analyze.values() return projects_to_analyze
def organize_projects_from_flowcell(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, fallback_libprep=None, quiet=False, create_files=True, config=None, config_file_path=None): """Sort demultiplexed Illumina flowcells into projects and return a list of them, creating the project/sample/libprep/seqrun dir tree on disk via symlinks. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param bool quiet: Don't send notification emails :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True) :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. :returns: A list of NGIProject objects. :rtype: list :raises RuntimeError: If no (valid) projects are found in the flowcell dirs """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() for demux_fcid_dir in demux_fcid_dirs_set: try: # Get the full path to the flowcell if it was passed in as just a name demux_fcid_dir = locate_flowcell(demux_fcid_dir) except ValueError as e: # Flowcell path couldn't be found/doesn't exist; skip it LOG.error('Skipping flowcell "{}": {}'.format(demux_fcid_dir, e)) continue # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = \ setup_analysis_directory_structure(fc_dir=demux_fcid_dir, projects_to_analyze=projects_to_analyze, restrict_to_projects=restrict_to_projects, restrict_to_samples=restrict_to_samples, create_files=create_files, fallback_libprep=fallback_libprep, config=config, quiet=quiet) if not projects_to_analyze: if restrict_to_projects: error_message = ( "No projects found to process: the specified flowcells " "({fcid_dirs}) do not contain the specified project(s) " "({restrict_to_projects}) or there was an error " "gathering required information.").format( fcid_dirs=",".join(demux_fcid_dirs_set), restrict_to_projects=",".join(restrict_to_projects)) else: error_message = ("No projects found to process in flowcells {} " "or there was an error gathering required " "information.".format( ",".join(demux_fcid_dirs_set))) raise RuntimeError(error_message) else: projects_to_analyze = list(projects_to_analyze.values()) return projects_to_analyze