Пример #1
0
    def get_sample_sheet(directory):
        """
        gets the sample sheet file path from a given run directory

        :param directory:
        :return:
        """
        logging.info("Looking for sample sheet in {}".format(directory))

        # Checks if we can access to the given directory, return empty and log a warning if we cannot.
        if not os.access(directory, os.W_OK):
            logging.error((
                "The directory is not accessible, can not parse samples from this directory {}"
                "".format(directory), directory))
            raise exceptions.DirectoryError(
                "The directory is not accessible, "
                "can not parse samples from this directory {}".format(
                    directory), directory)

        sample_sheet_file_name = Parser.SAMPLE_SHEET_FILE_NAME
        file_list = common.get_file_list(directory)
        if sample_sheet_file_name not in file_list:
            logging.error(
                "No sample sheet file in the Directory Upload format found")
            raise exceptions.DirectoryError(
                "The directory {} has no sample sheet file in the Directory Upload format "
                "with the name {}"
                "".format(directory, sample_sheet_file_name), directory)
        else:
            logging.debug("Sample sheet found")
            return os.path.join(directory, sample_sheet_file_name)
Пример #2
0
def get_file_list(directory):
    """
    Get the list of file names in the data directory

    :param directory: directory to search for files
    :return: list of file names in data directory
    """
    # verify that directory exists
    if not os.path.exists(directory):
        raise exceptions.DirectoryError("Could not list files, as directory does not exist.", directory)
    # Create a file list of the directory, only hit the os once
    file_list = next(os.walk(directory))[2]
    return file_list
Пример #3
0
def find_directory_list(directory):
    """Find and return all directories in the specified directory.

    Arguments:
    directory -- the directory to find directories in

    Returns: a list of directories including current directory
    """

    # Checks if we can access to the given directory, return empty and log a warning if we cannot.
    if not os.access(directory, os.W_OK):
        raise exceptions.DirectoryError("The directory is not writeable, "
                                        "can not upload samples from this directory {}".format(directory),
                                        directory)

    dir_list = next(os.walk(directory))[1]  # Gets the list of directories in the directory
    full_dir_list = []
    for d in dir_list:
        full_dir_list.append(os.path.join(directory, d))
    return full_dir_list
Пример #4
0
    def get_full_data_directory(sample_sheet):
        """
        Returns the path to where the sequence data files can be found, including the sample_sheet directory

        Note, this hits the os, and as such is not to be used with cloud solutions.
        For cloud solutions, use get_relative_data_directory() and solve the actual path for your cloud environment

        :param sample_sheet: Sample sheet acts as the starting point for the data directory
        :return: a string which represents the concatenated path components, as per os.path.join
        """
        sample_sheet_dir = os.path.dirname(sample_sheet)
        partial_data_dir = os.path.join(sample_sheet_dir, "Alignment_1")
        # Verify the partial path exits, path could not exist if there was a sequencing error
        # Also, if someone runs the miniseq parser on a miseq directory, this is the failure point
        if not os.path.exists(partial_data_dir):
            raise exceptions.DirectoryError((
                "The uploader was unable to find the data directory, Verify that the run directory is "
                "undamaged, and that it is a MiniSeq sequencing run."),
                                            partial_data_dir)

        # get the directories [1] get the first directory [0]
        data_dir = os.path.join(partial_data_dir,
                                next(os.walk(partial_data_dir))[1][0], "Fastq")
        return data_dir