示例#1
0
def produce_analysis_piper(ngi_config, project_id):
    #create piper_ngi
    analysis_dir = os.path.join(ngi_config["analysis"]["base_root"],
                                            ngi_config["analysis"]["sthlm_root"],
                                            ngi_config["analysis"]["top_dir"],
                                            "ANALYSIS", project_id)
    data_dir = os.path.join(ngi_config["analysis"]["base_root"],
                                            ngi_config["analysis"]["sthlm_root"],
                                            ngi_config["analysis"]["top_dir"],
                                            "DATA", project_id)

    piper_ngi_dir = os.path.join(analysis_dir, "piper_ngi")
    fs.create_folder(piper_ngi_dir)
    piper_dirs = ["01_raw_alignments","02_preliminary_alignment_qc","03_genotype_concordance",
                "04_merged_aligments","05_processed_alignments","06_final_alignment_qc","07_variant_calls","08_misc"]
    for piper_dir in piper_dirs:
        current_dir =  os.path.join(piper_ngi_dir, piper_dir)
        fs.create_folder(current_dir)
        if piper_dir == "05_processed_alignments":
            for sample_id in os.listdir(data_dir):
                bam_file = "{}.clean.dedup.bam".format(sample_id)
                fs.touch(os.path.join(current_dir, bam_file))
        if piper_dir == "07_variant_calls":
            for sample_id in os.listdir(data_dir):
                vcf_file = "{}.clean.dedup.recal.bam.raw.indel.vcf.gz".format(sample_id)
                fs.touch(os.path.join(current_dir, vcf_file))
    current_dir = os.path.join(piper_ngi_dir, "sbatch")
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, "setup_xml_files")
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, "logs")
    fs.create_folder(current_dir)
    create_version_report(current_dir)
示例#2
0
def create_uppmax_env(ngi_config):
    paths = {}
    if 'analysis' not in ngi_config:
        sys.exit('ERROR: analysis must be a field of NGI_CONFIG.')
    try:
        base_root = ngi_config['analysis']['base_root']
        paths['base_root'] = base_root
        sthlm_root = ngi_config['analysis']['sthlm_root']
        paths['sthlm_root'] = sthlm_root
        top_dir = ngi_config['analysis']['top_dir']
        paths['top_dir'] = top_dir
    except KeyError as e:
        raise SystemExit('Config file is missing the key {}, make sure it have all required information'.format(str(e)))
    if 'environment' not in ngi_config:
        sys.exit('ERROR: environment must be a field of NGI_CONFIG.')
    try:
        # Get base root
        flowcell_inboxes = ngi_config['environment']['flowcell_inbox']
        flowcell_inbox = flowcell_inboxes[0] # I assume there is only one
        paths['flowcell_inbox'] = flowcell_inbox
    except ValueError as e:
        sys.exit('key error, flowcell_inbox not found in "{}": {}'.format(ngi_config, e))
    # Now I need to create the folders for this
    if not os.path.exists(base_root):
        sys.exit('base_root needs to exists: {}'.format(base_root))
    fs.create_folder(flowcell_inbox)
    if sthlm_root is None:
        path_to_analysis = os.path.join(base_root, top_dir)
    else:
        path_to_analysis = os.path.join(base_root, sthlm_root, top_dir)
    fs.create_folder(path_to_analysis)
    return paths
示例#3
0
def create_uppmax_env(ngi_config):
    paths = {}
    if "analysis" not in ngi_config:
        sys.exit("ERROR: analysis must be a field of NGI_CONFIG.")
    try:
        base_root = ngi_config["analysis"]["base_root"]
        paths["base_root"] = base_root
        sthlm_root = ngi_config["analysis"]["sthlm_root"]
        paths["sthlm_root"] = sthlm_root
        top_dir = ngi_config["analysis"]["top_dir"]
        paths["top_dir"] = top_dir
    except KeyError as e:
        raise SystemExit("Config file is missing the key {}, make sure it have all required information".format(str(e)))
    if "environment" not in ngi_config:
        sys.exit("ERROR: environment must be a field of NGI_CONFIG.")
    try:
        # get base root
        flowcell_inboxes = ngi_config["environment"]["flowcell_inbox"]
        flowcell_inbox   = flowcell_inboxes[0] # I assume there is only one
        paths["flowcell_inbox"] = flowcell_inbox
    except ValueError as e:
        sys.exit('key error, flowcell_inbox not found in "{}": {}'.format(ngi_config, e))
    #now I need to create the folders for this
    if not os.path.exists(base_root):
        sys.exit('base_root needs to exists: {}'.format(base_root))
    fs.create_folder(flowcell_inbox)
    if sthlm_root is None:
        path_to_analysis = os.path.join(base_root, top_dir)
    else:
        path_to_analysis = os.path.join(base_root, sthlm_root, top_dir)
    fs.create_folder(path_to_analysis)
    return paths
 def do_delivery(self):
     """ Deliver the staged delivery folder using rsync
         :returns: True if delivery was successful, False if unsuccessful
         :raises DelivererRsyncError: if an exception occurred during
             transfer
     """
     agent = transfer.RsyncAgent(
         self.expand_path(self.stagingpath),
         dest_path=self.expand_path(self.deliverypath),
         digestfile=self.delivered_digestfile(),
         remote_host=getattr(self, 'remote_host', None),
         remote_user=getattr(self, 'remote_user', None),
         log=logger,
         opts={
             '--files-from': [self.staging_filelist()],
             '--copy-links': None,
             '--recursive': None,
             '--perms': None,
             '--chmod': 'ug+rwX,o-rwx',
             '--verbose': None,
             '--exclude': ["*rsync.out", "*rsync.err"]
         })
     create_folder(os.path.dirname(self.transfer_log()))
     try:
         return agent.transfer(transfer_log=self.transfer_log())
     except transfer.TransferError as e:
         raise DelivererRsyncError(e)
示例#5
0
    def stage_delivery(self):
        """ Stage a delivery by symlinking source paths to destination paths 
            according to the returned tuples from the gather_files function. 
            Checksums will be written to a digest file in the staging path. 
            Failure to stage individual files will be logged as warnings but will
            not terminate the staging. 
            
            :raises DelivererError: if an unexpected error occurred
        """
        digestpath = self.staging_digestfile()
        filelistpath = self.staging_filelist()
        create_folder(os.path.dirname(digestpath))
        try:
            with open(digestpath, 'w') as dh, open(filelistpath, 'w') as fh:
                agent = transfer.SymlinkAgent(None, None, relative=True)
                for src, dst, digest in self.gather_files():
                    agent.src_path = src
                    agent.dest_path = dst
                    try:
                        agent.transfer()
                    except (transfer.TransferError, transfer.SymlinkError) as e:
                        logger.warning("failed to stage file '{}' when "
                                       "delivering {} - reason: {}".format(src, str(self), e))

                    fpath = os.path.relpath(dst, self.expand_path(self.stagingpath))
                    fh.write("{}\n".format(fpath))
                    if digest is not None:
                        dh.write("{}  {}\n".format(digest, fpath))
                # finally, include the digestfile in the list of files to deliver
                fh.write("{}\n".format(os.path.basename(digestpath)))
        except (IOError, fs.FileNotFoundException, fs.PatternNotMatchedException) as e:
            raise DelivererError(
                "failed to stage delivery - reason: {}".format(e))
        return True
示例#6
0
    def deliver_run_folder(self):
        '''Hard stages run folder and initiates delivery
        '''
        #stage the data
        dst = self.expand_path(self.stagingpathhard)
        path_to_data = self.expand_path(self.datapath)
        runfolder_archive = os.path.join(path_to_data, self.fcid + ".tar.gz")
        runfolder_md5file = runfolder_archive + ".md5"

        question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? "
        if not self.sensitive:
            question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? "
        if proceed_or_not(question):
            logger.info(
                "Delivering {} to GRUS with mover. Project marked as SENSITIVE={}"
                .format(str(self), self.sensitive))
        else:
            logger.error(
                "{} delivery has been aborted. Sensitive level was WRONG.".
                format(str(self)))
            return False

        status = True

        create_folder(dst)
        try:
            shutil.copy(runfolder_archive, dst)
            shutil.copy(runfolder_md5file, dst)
            logger.info("Copying files {} and {} to {}".format(
                runfolder_archive, runfolder_md5file, dst))
        except IOError, e:
            logger.error(
                "Unable to copy files to {}. Please check that the files exist and that the filenames match the flowcell ID."
                .format(dst))
示例#7
0
 def do_delivery(self):
     """ Deliver the staged delivery folder using rsync
         :returns: True if delivery was successful, False if unsuccessful
         :raises DelivererRsyncError: if an exception occurred during
             transfer
     """
     agent = transfer.RsyncAgent(
         self.expand_path(self.stagingpath),
         dest_path=self.expand_path(self.deliverypath),
         digestfile=self.delivered_digestfile(),
         remote_host=getattr(self, 'remote_host', None),
         remote_user=getattr(self, 'remote_user', None),
         log=logger,
         opts={
             '--files-from': [self.staging_filelist()],
             '--copy-links': None,
             '--recursive': None,
             '--perms': None,
             '--chmod': 'ug+rwX,o-rwx',
             '--verbose': None,
             '--exclude': ["*rsync.out", "*rsync.err"]
         })
     create_folder(os.path.dirname(self.transfer_log()))
     try:
         return agent.transfer(transfer_log=self.transfer_log())
     except transfer.TransferError as e:
         raise DelivererRsyncError(e)
    def deliver_project(self):
        """ Deliver all samples in a project to grus
            :returns: True if all samples were delivered successfully, False if
                any sample was not properly delivered or ready to be delivered
        """

        # moved this part from constructor, as we can create an object without running the delivery (e.g. to check_delivery_status)

        #check if the project directory already exists, if so abort
        hard_stagepath = self.expand_path(self.stagingpathhard)
        if os.path.exists(hard_stagepath):
            logger.error("In {} found already folder {}. No multiple mover deliveries are allowed".format(
                    hard_stagepath, self.projectid))
            raise DelivererInterruptedError("Hard Staged Folder already present")
        else:
            #otherwise lock the delivery by creating the folder
            create_folder(hard_stagepath)
        logger.info("Delivering {} to GRUS".format(str(self)))
        if self.get_delivery_status() == 'DELIVERED' \
                and not self.force:
            logger.info("{} has already been delivered".format(str(self)))
            return True
        status = True
        try:
            # connect to charon, return list of sample objects
            samples_to_deliver = self.get_staged_samples_from_charon()
        except Exception, e:
            logger.error("Cannot get samples from Charon. Error says: {}".format(str(e)))
            logger.exception(e)
            exit(1)
示例#9
0
    def stage_delivery(self):
        """ Stage a delivery by symlinking source paths to destination paths 
            according to the returned tuples from the gather_files function. 
            Checksums will be written to a digest file in the staging path. 
            Failure to stage individual files will be logged as warnings but will
            not terminate the staging. 
            
            :raises DelivererError: if an unexpected error occurred
        """
        digestpath = self.staging_digestfile()
        filelistpath = self.staging_filelist()
        create_folder(os.path.dirname(digestpath))
        try:
            with open(digestpath, 'w') as dh, open(filelistpath, 'w') as fh:
                agent = transfer.SymlinkAgent(None, None, relative=True)
                for src, dst, digest in self.gather_files():
                    agent.src_path = src
                    agent.dest_path = dst
                    try:
                        agent.transfer()
                    except (transfer.TransferError, transfer.SymlinkError) as e:
                        logger.warning("failed to stage file '{}' when "
                                       "delivering {} - reason: {}".format(src, str(self), e))

                    fpath = os.path.relpath(dst, self.expand_path(self.stagingpath))
                    fh.write("{}\n".format(fpath))
                    if digest is not None:
                        dh.write("{}  {}\n".format(digest, fpath))
                # finally, include the digestfile in the list of files to deliver
                fh.write("{}\n".format(os.path.basename(digestpath)))
        except (IOError, fs.FileNotFoundException, fs.PatternNotMatchedException) as e:
            raise DelivererError(
                "failed to stage delivery - reason: {}".format(e))
        return True
示例#10
0
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ):
    # Create something like 160217_ST-E00201_0063_AHJHNYCCXX
    path_to_fc = os.path.join(incoming_dir, run_name)
    if os.path.exists(path_to_fc):
        # This FC exists, skip it
        return
    fs.create_folder(path_to_fc)
    fs.touch(os.path.join(path_to_fc, 'RTAComplete.txt'))
    # Create folder Demultiplexing
    fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing'))
    # Create folder Demultiplexing/Reports
    fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Reports'))
    # Create folder Demultiplexing/Stats
    fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Stats'))
    # Memorise SampleSheet stats
    header = []
    for key in samplesheet[0]:
        header.append(key)
    counter = 1
    current_lane = ''
    for line in samplesheet:
        project_name = line.get('Sample_Project', line.get('Project', ''))
        lane = line['Lane']
        if current_lane == '':
            current_lane = lane
        elif current_lane != lane:
            counter = 1
            current_lane = lane
        sample_id = line.get('SampleID', line.get('Sample_ID', ''))
        sample_name = line.get('SampleName', line.get('Sample_Name', ''))
        # Create dir structure
        fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id))
        # Now create the data
        fastq_1_dest = '{}_S{}_L00{}_R1_001.fastq.gz'.format(sample_name, counter, lane)
        fastq_2_dest = '{}_S{}_L00{}_R2_001.fastq.gz'.format(sample_name, counter, lane)
        counter += 1
        if fastq_1 is None:
            fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name,
                                  sample_id, fastq_1_dest))
            fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name,
                                  sample_id, fastq_2_dest))
        else:
            fs.do_symlink(fastq_1, os.path.join(path_to_fc, 'Demultiplexing',
                                                project_name, sample_id, fastq_1_dest))
            fs.do_symlink(fastq_2, os.path.join(path_to_fc, 'Demultiplexing',
                                                project_name, sample_id, fastq_2_dest))

    with open(os.path.join(path_to_fc, 'SampleSheet.csv'), 'w') as Samplesheet_file:
        Samplesheet_file.write(u'[Header]\n')
        Samplesheet_file.write(u'Date,2016-03-29\n')
        Samplesheet_file.write(u'Investigator Name,Christian Natanaelsson\n')
        Samplesheet_file.write(u'[Data]\n')
        for key in header:
             Samplesheet_file.write(u'{},'.format(key))
        Samplesheet_file.write(u'\n')
        for line in samplesheet:
            for key in header:
                Samplesheet_file.write(u'{},'.format(line[key]))
            Samplesheet_file.write(u'\n')
示例#11
0
 def deliver_project(self):
     """ Deliver all samples in a project to grus
         :returns: True if all samples were delivered successfully, False if
             any sample was not properly delivered or ready to be delivered
     """
     #first thing check that we are using mover 1.0.0
     if not check_mover_version():
          logger.error("Not delivering becouse wrong mover version detected")
          return False
     # moved this part from constructor, as we can create an object without running the delivery (e.g. to check_delivery_status)
     #check if the project directory already exists, if so abort
     soft_stagepath = self.expand_path(self.stagingpath)
     hard_stagepath = self.expand_path(self.stagingpathhard)
     if os.path.exists(hard_stagepath):
         logger.error("In {} found already folder {}. No multiple mover deliveries are allowed".format(
                 hard_stagepath, self.projectid))
         raise DelivererInterruptedError("Hard Staged Folder already present")
     #check that this project is not under delivery with mover already in this case stop delivery
     if self.get_delivery_status() == 'DELIVERED' \
             and not self.force:
         logger.info("{} has already been delivered. This project will not be delivered again this time.".format(str(self)))
         return True
     elif self.get_delivery_status() == 'IN_PROGRESS':
         logger.error("Project {} is already under delivery. No multiple mover deliveries are allowed".format(
                 self.projectid))
         raise DelivererInterruptedError("Proejct already under delivery with Mover")
     elif self.get_delivery_status() == 'PARTIAL':
         logger.warning("{} has already been partially delivered. Please confirm you want to proceed.".format(str(self)))
         if proceed_or_not("Do you want to proceed (yes/no): "):
             logger.info("{} has already been partially delivered. User confirmed to proceed.".format(str(self)))
         else:
             logger.error("{} has already been partially delivered. User decided to not proceed.".format(str(self)))
             return False
     #now check if the sensitive flag has been set in the correct way
     question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? "
     if not self.sensitive:
         question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? "
     if proceed_or_not(question):
         logger.info("Delivering {} to GRUS with mover. Project marked as SENSITIVE={}".format(str(self), self.sensitive))
     else:
         logger.error("{} delivery has been aborted. Sensitive level was WRONG.".format(str(self)))
         return False
     #now start with the real work
     status = True
     #otherwise lock the delivery by creating the folder
     create_folder(hard_stagepath)
     #now find the PI mail which is needed to create the delivery project
     if self.pi_email is None:
         try:
             self.pi_email = self._get_pi_email()
             logger.info("email for PI for project {} found: {}".format(self.projectid, self.pi_email))
         except Exception, e:
             logger.error("Cannot fetch pi_email from StatusDB. Error says: {}".format(str(e)))
             # print the traceback, not only error message -> isn't it something more useful?
             logger.exception(e)
             status = False
             return status
示例#12
0
 def deliver_project(self):
     """ Deliver all samples in a project to grus
         :returns: True if all samples were delivered successfully, False if
             any sample was not properly delivered or ready to be delivered
     """
     #first thing check that we are using mover 1.0.0
     if not check_mover_version():
          logger.error("Not delivering becouse wrong mover version detected")
          return False
     # moved this part from constructor, as we can create an object without running the delivery (e.g. to check_delivery_status)
     #check if the project directory already exists, if so abort
     hard_stagepath = self.expand_path(self.stagingpathhard)
     if os.path.exists(hard_stagepath):
         logger.error("In {} found already folder {}. No multiple mover deliveries are allowed".format(
                 hard_stagepath, self.projectid))
         raise DelivererInterruptedError("Hard Staged Folder already present")
     #check that this project is not under delivery with mover already in this case stop delivery
     if self.get_delivery_status() == 'DELIVERED' \
             and not self.force:
         logger.info("{} has already been delivered. This project will not be delivered again this time.".format(str(self)))
         return True
     elif self.get_delivery_status() == 'IN_PROGRESS':
         logger.error("Project {} is already under delivery. No multiple mover deliveries are allowed".format(
                 self.projectid))
         raise DelivererInterruptedError("Proejct already under delivery with Mover")
     elif self.get_delivery_status() == 'PARTIAL':
         logger.warning("{} has already been partially delivered. Please confirm you want to proceed.".format(str(self)))
         if proceed_or_not("Do you want to proceed (yes/no): "):
             logger.info("{} has already been partially delivered. User confirmed to proceed.".format(str(self)))
         else:
             logger.error("{} has already been partially delivered. User decided to not proceed.".format(str(self)))
             return False
     #now check if the sensitive flag has been set in the correct way
     question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? "
     if not self.sensitive:
         question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? "
     if proceed_or_not(question):
         logger.info("Delivering {} to GRUS with mover. Project marked as SENSITIVE={}".format(str(self), self.sensitive))
     else:
         logger.error("{} delivery has been aborted. Sensitive level was WRONG.".format(str(self)))
         return False
     #now start with the real work
     status = True
     #otherwise lock the delivery by creating the folder
     create_folder(hard_stagepath)
     #now find the PI mail which is needed to create the delivery project
     if self.pi_email is None:
         try:
             self.pi_email = self._get_pi_email()
             logger.info("email for PI for project {} found: {}".format(self.projectid, self.pi_email))
         except Exception, e:
             logger.error("Cannot fetch pi_email from StatusDB. Error says: {}".format(str(e)))
             # print the traceback, not only error message -> isn't it something more useful?
             logger.exception(e)
             status = False
             return status
示例#13
0
def merge_demux_results(fc_dir):
    """Merge results of demultiplexing from different demultiplexing folders

    :param str fc_dir: Path to the flowcell directory.
    """
    for option in CONFIG['analysis']['bcl2fastq']['options']:
        if isinstance(option, dict) and option.get('output-dir'):
            _demux_folder = option.get('output-dir')
    unaligned_dirs = glob.glob(os.path.join(fc_dir, '{}_*'.format(_demux_folder)))
    #If it is a MiSeq run, the fc_id will be everything after the -
    if '-' in os.path.basename(fc_dir):
        fc_id = os.path.basename(fc_dir).split('_')[-1]
    #If it is a HiSeq run, we only want the flowcell id (without A/B)
    else:
        fc_id = os.path.basename(fc_dir).split('_')[-1][1:]
    basecall_dir = 'Basecall_Stats_{fc_id}'.format(fc_id=fc_id)
    merged_dir = os.path.join(fc_dir, _demux_folder)
    merged_basecall_dir = os.path.join(merged_dir, basecall_dir)
    #Create the final Unaligned folder and copy there all configuration files
    filesystem.create_folder(os.path.join(merged_dir, basecall_dir))
    shutil.copy(os.path.join(unaligned_dirs[0], basecall_dir,
                    'Flowcell_demux_summary.xml'), merged_basecall_dir)
    shutil.copy(os.path.join(unaligned_dirs[0], basecall_dir,
                    'Demultiplex_Stats.htm'), merged_basecall_dir)
    #The file Undemultiplexed_stats.metrics may not always be there.
    u_s_file = os.path.exists(os.path.join(unaligned_dirs[0], basecall_dir,
                            'Undemultiplexed_stats.metrics'))
    if u_s_file:
        shutil.copy(os.path.join(unaligned_dirs[0], basecall_dir,
                    'Undemultiplexed_stats.metrics'), merged_basecall_dir)
        #And it is possible that it is empty, in which case we have to add
        #the header
        u_s_file_final = os.path.join(merged_basecall_dir, 'Undemultiplexed_stats.metrics')
        with open(u_s_file_final, 'r') as f:
            content = f.readlines()
            header = ['lane', 'sequence', 'count', 'index_name']
            if content and content[0].split() != header:
                with open(u_s_file_final, 'w') as final:
                    final.writelines('\t'.join(header) + '\n')
    if len(unaligned_dirs) > 1:
        for u in unaligned_dirs[1:]:
            #Merge Flowcell_demux_summary.xml
            m_flowcell_demux = merge_flowcell_demux_summary(merged_dir, u, fc_id)
            m_flowcell_demux.write(os.path.join(merged_dir, basecall_dir,
                            'Flowcell_demux_summary.xml'))

            #Merge Demultiplex_Stats.htm
            m_demultiplex_stats = merge_demultiplex_stats(merged_dir, u, fc_id)
            with open(os.path.join(merged_dir, basecall_dir, 'Demultiplex_Stats.htm'), 'w+') as f:
                f.writelines(re.sub(r"Unaligned_[0-9]{1,2}bp", 'Unaligned',
                    m_demultiplex_stats.renderContents()))

            #Merge Undemultiplexed_stats.metrics
            if u_s_file:
                merge_undemultiplexed_stats_metrics(merged_dir, u, fc_id)
示例#14
0
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ):
    # create something like 160217_ST-E00201_0063_AHJHNYCCXX
    path_to_fc = os.path.join(incoming_dir, run_name)
    if os.path.exists(path_to_fc):
        # this FC exists, skip it
        return
    fs.create_folder(path_to_fc)
    fs.touch(os.path.join(path_to_fc, "RTAComplete.txt"))
    # create folder Demultiplexing
    fs.create_folder(os.path.join(path_to_fc, "Demultiplexing"))
    # create folder Demultiplexing/Reports
    fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Reports"))
    # create folder Demultiplexing/Stats
    fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Stats"))
    #memorise SampleSheet stats
    header = []
    for key in samplesheet[0]:
        header.append(key)
    counter = 1
    current_lane = ""
    for line in samplesheet:
        project_name = line.get("Sample_Project", line.get("Project", ""))
        lane = line["Lane"]
        if current_lane == "":
            current_lane = lane
        elif current_lane != lane:
            counter = 1
            current_lane = lane
        sample_id = line.get("SampleID", line.get("Sample_ID", ""))
        sample_name = line.get("SampleName", line.get("Sample_Name", ""))
        #create dir structure
        fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id))
        #now create the data
        fastq_1_dest = "{}_S{}_L00{}_R1_001.fastq.gz".format(sample_name, counter, lane)
        fastq_2_dest = "{}_S{}_L00{}_R2_001.fastq.gz".format(sample_name, counter, lane)
        counter += 1
        if fastq_1 is None:
            fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest))
            fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest))
        else:
            fs.do_symlink(fastq_1, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest))
            fs.do_symlink(fastq_2, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest))
    
    with open(os.path.join(path_to_fc, "SampleSheet.csv"), "w") as Samplesheet_file:
        Samplesheet_file.write("[Header]\n")
        Samplesheet_file.write("Date,2016-03-29\n")
        Samplesheet_file.write("Investigator Name,Christian Natanaelsson\n")
        Samplesheet_file.write("[Data]\n")
        for key in header:
             Samplesheet_file.write("{},".format(key))
        Samplesheet_file.write("\n")
        for line in samplesheet:
            for key in header:
                Samplesheet_file.write("{},".format(line[key]))
            Samplesheet_file.write("\n")
示例#15
0
 def acknowledge_delivery(self, tstamp=_timestamp()):
     try:
         ackfile = self.expand_path(
             os.path.join(self.deliverystatuspath, "{}_delivered.ack".format(
                 self.sampleid or self.projectid)))
         create_folder(os.path.dirname(ackfile))
         with open(ackfile, 'w') as fh:
             fh.write("{}\n".format(tstamp))
     except (AttributeError, IOError) as e:
         logger.warning(
             "could not write delivery acknowledgement, reason: {}".format(
                 e))
示例#16
0
 def acknowledge_delivery(self, tstamp=_timestamp()):
     try:
         ackfile = self.expand_path(
             os.path.join(self.deliverystatuspath, "{}_delivered.ack".format(
                 self.sampleid or self.projectid)))
         create_folder(os.path.dirname(ackfile))
         with open(ackfile, 'w') as fh:
             fh.write("{}\n".format(tstamp))
     except (AttributeError, IOError) as e:
         logger.warning(
             "could not write delivery acknowledgement, reason: {}".format(
                 e))
示例#17
0
    def deliver_run_folder(self):
        """ Symlink run folder to stage path, create DDS delivery project and upload data.
        """
        # Stage the data
        dst = self.expand_path(self.stagingpath)
        path_to_data = self.expand_path(self.datapath)
        runfolder_archive = os.path.join(path_to_data, self.fcid + ".tar.gz")
        runfolder_md5file = runfolder_archive + ".md5"
        
        question = "This project has been marked as SENSITIVE (option --sensitive). Do you want to proceed with delivery? "
        if not self.sensitive:
            question = "This project has been marked as NON-SENSITIVE (option --no-sensitive). Do you want to proceed with delivery? "
        if proceed_or_not(question):
            logger.info("Delivering {} with DDS. Project marked as SENSITIVE={}".format(str(self), self.sensitive))
        else:
            logger.error("{} delivery has been aborted. Sensitive level was WRONG.".format(str(self)))
            return False

        status = True

        create_folder(dst)
        try:
            os.symlink(runfolder_archive, dst)
            os.symlink(runfolder_md5file, dst)
            logger.info("Symlinking files {} and {} to {}".format(runfolder_archive, runfolder_md5file, dst))
        except IOError as e:
            logger.error("Unable to symlink files to {}. Please check that the files "
                         "exist and that the filenames match the flowcell ID.".format(dst))

        delivery_id = ''
        try:
            delivery_id = self._create_delivery_project()
            logger.info("Delivery project for project {} has been created. "
                        "Delivery ID is {}".format(self.projectid, delivery_id))
        except AssertionError as e:
            logger.exception('Unable to detect DDS delivery project.')
            raise e

        # Upload with DDS
        dds_delivery_status = self.upload_data(delivery_id)

        if dds_delivery_status:
            logger.info("DDS upload for project {} to "
                        "delivery project {} was sucessful".format(self.projectid,
                                                                   delivery_id))
        else:
            logger.error('Something when wrong when uploading {} '
                         'to DDS project {}'.format(self.projectid, delivery_id))
            status = False
        return status
 def create_report(self):
     """ Create a sample report and an aggregate report via a system call """
     logprefix = os.path.abspath(
         self.expand_path(
             os.path.join(self.logpath,
                          "{}-{}".format(self.projectid, self.sampleid))))
     try:
         if not create_folder(os.path.dirname(logprefix)):
             logprefix = None
     except AttributeError:
         logprefix = None
     with chdir(self.expand_path(self.reportpath)):
         # create the ign_sample_report for this sample
         cl = self.report_sample.split(' ')
         cl.extend(["--samples", self.sampleid])
         call_external_command(cl,
                               with_log_files=(logprefix is not None),
                               prefix="{}_sample".format(logprefix))
         # estimate the delivery date for this sample to 0.5 days ahead
         cl = self.report_aggregate.split(' ')
         cl.extend([
             "--samples_extra",
             json.dumps({
                 self.sampleid: {
                     "delivered":
                     "{}(expected)".format(_timestamp(days=0.5))
                 }
             })
         ])
         call_external_command(cl,
                               with_log_files=(logprefix is not None),
                               prefix="{}_aggregate".format(logprefix))
示例#19
0
 def test_crete_folder3(self):
     """ Ensure that create_folder handles thrown exceptions gracefully """
     with mock.patch.object(filesystem.os, 'makedirs', side_effect=OSError):
         self.assertFalse(
             filesystem.create_folder(
                 os.path.join(self.rootdir, "target-non-existing")),
             "A raised exception was not handled properly")
示例#20
0
 def test_crete_folder3(self):
     """ Ensure that create_folder handles thrown exceptions gracefully """
     with mock.patch.object(filesystem.os,'makedirs',side_effect=OSError):
         self.assertFalse(
             filesystem.create_folder(
                 os.path.join(self.rootdir,"target-non-existing")),
             "A raised exception was not handled properly")
示例#21
0
 def create_report(self):
     """ Create a sample report and an aggregate report via a system call """
     logprefix = os.path.abspath(
         self.expand_path(os.path.join(self.logpath, "{}-{}".format(
             self.projectid, self.sampleid))))
     try:
         if not create_folder(os.path.dirname(logprefix)):
             logprefix = None
     except AttributeError:
         logprefix = None
     with chdir(self.expand_path(self.reportpath)):
         # create the ign_sample_report for this sample
         cl = self.report_sample.split(' ')
         cl.extend(["--samples",self.sampleid])
         call_external_command(
             cl,
             with_log_files=(logprefix is not None),
             prefix="{}_sample".format(logprefix))
         # estimate the delivery date for this sample to 0.5 days ahead
         cl = self.report_aggregate.split(' ')
         cl.extend([
             "--samples_extra",
             json.dumps({
                 self.sampleid: {
                     "delivered": "{}(expected)".format(
                         _timestamp(days=0.5))}})
         ])
         call_external_command(
             cl,
             with_log_files=(logprefix is not None),
             prefix="{}_aggregate".format(logprefix))
示例#22
0
    def transfer(self):
        """Create the symlink as specified by this SymlinkAgent instance.

        :returns: True if the symlink was created successfully, False otherwise
        :raises transfer.TransferError:
            if src_path or dest_path were not valid
        :raises transfer.SymlinkError:
            if an error occurred when creating the symlink
        """
        self.validate_src_path()
        self.validate_dest_path()
        if os.path.exists(self.dest_path):
            # If the existing target is a symlink that points to the
            # source, we're all good
            if self.validate_transfer():
                logger.debug('target exists and points to the correct '
                             'source path: "{}"'.format(self.src_path))
                return True
            # If we are not overwriting, return False
            if not self.overwrite:
                logger.debug('target "{}" exists and will not be '
                             'overwritten'.format(self.dest_path))
                return False
            # If the target is a mount, let's not mess with it
            if os.path.ismount(self.dest_path):
                raise SymlinkError('target exists and is a mount')
            # If the target is a link or a file, we remove it
            if os.path.islink(self.dest_path) or \
                os.path.isfile(self.dest_path):
                logger.debug('removing existing target file "{}"'.format(
                    self.dest_path))
                try:
                    os.unlink(self.dest_path)
                except OSError as e:
                    raise SymlinkError(e)
            # If the target is a directory, we remove it and
            # everything underneath
            elif os.path.isdir(self.dest_path):
                logger.debug('removing existing target folder "{}"'.format(
                    self.dest_path))
                try:
                    shutil.rmtree(self.dest_path)
                except OSError as e:
                    raise SymlinkError(e)
            # If it's something else, let's bail out
            else:
                raise SymlinkError('target exists and will not be overwritten')
        if not create_folder(os.path.dirname(self.dest_path)):
            raise SymlinkError('failed to create target folder hierarchy')
        try:
            # If we should create a relative symlink, determine the relative path
            os.symlink(
                os.path.relpath(self.src_path,os.path.dirname(self.dest_path)) \
                if self.relative else self.src_path,
                self.dest_path)
        except OSError as e:
            raise SymlinkError(e)
        return (not self.validate) or self.validate_transfer()
示例#23
0
 def test_crete_folder1(self):
     """ Ensure that a non-existing folder is created """
     target_folder = os.path.join(self.rootdir, "target-non-existing")
     self.assertTrue(filesystem.create_folder(target_folder),
                     "A non-existing target folder could not be created")
     self.assertTrue(
         os.path.exists(target_folder),
         "A non-existing target folder was not created \
         but method returned True")
示例#24
0
 def transfer(self):
     """ Create the symlink as specified by this SymlinkAgent instance.
         :returns: True if the symlink was created successfully, False otherwise
         :raises transfer.TransferError: 
             if src_path or dest_path were not valid
         :raises transfer.SymlinkError: 
             if an error occurred when creating the symlink
     """
     self.validate_src_path()
     self.validate_dest_path()
     if os.path.exists(self.dest_path):
         # If the existing target is a symlink that points to the 
         # source, we're all good
         if self.validate_transfer():
             logger.debug("target exists and points to the correct "
                          "source path: '{}'".format(self.src_path))
             return True
         # If we are not overwriting, return False
         if not self.overwrite:
             logger.debug("target '{}' exists and will not be "
                          "overwritten".format(self.dest_path))
             return False
         # If the target is a mount, let's not mess with it
         if os.path.ismount(self.dest_path):
             raise SymlinkError("target exists and is a mount")
         # If the target is a link or a file, we remove it
         if os.path.islink(self.dest_path) or \
             os.path.isfile(self.dest_path):
             logger.debug("removing existing target file '{}'"
                          .format(self.dest_path))
             try:
                 os.unlink(self.dest_path)
             except OSError as e:
                 raise SymlinkError(e)        
         # If the target is a directory, we remove it and
         # everything underneath
         elif os.path.isdir(self.dest_path):
             logger.debug("removing existing target folder '{}'"
                          .format(self.dest_path))
             try:
                 shutil.rmtree(self.dest_path)
             except OSError as e:
                 raise SymlinkError(e)        
         # If it's something else, let's bail out
         else:
             raise SymlinkError("target exists and will not be overwritten")
     if not create_folder(os.path.dirname(self.dest_path)):
         raise SymlinkError("failed to create target folder hierarchy")
     try:
         # If we should create a relative symlink, determine the relative path
         os.symlink(
             os.path.relpath(self.src_path,os.path.dirname(self.dest_path)) \
             if self.relative else self.src_path,
             self.dest_path)
     except OSError as e:
         raise SymlinkError(e)
     return (not self.validate) or self.validate_transfer()
示例#25
0
 def test_crete_folder_parent_non_existing(self):
     """Ensure that a non-existing parent folder is created."""
     target_folder = os.path.join(self.rootdir, 'parent-non-existing',
                                  'target-non-existing')
     self.assertTrue(
         filesystem.create_folder(target_folder),
         'A non-existing parent and target folder could not be created')
     self.assertTrue(
         os.path.exists(target_folder),
         'A non-existing parent folder was not created \
         but method returned True')
示例#26
0
 def test_crete_folder1(self):
     """ Ensure that a non-existing folder is created """
     target_folder = os.path.join(self.rootdir,"target-non-existing")
     self.assertTrue(
         filesystem.create_folder(target_folder),
         "A non-existing target folder could not be created")
     self.assertTrue(
         os.path.exists(target_folder),
         "A non-existing target folder was not created \
         but method returned True"
     )
 def test_deliver_sample1(self):
     """ transfer a sample using rsync
     """
     # create some content to transfer
     digestfile = self.deliverer.staging_digestfile()
     filelist = self.deliverer.staging_filelist()
     basedir = os.path.dirname(digestfile)
     create_folder(basedir)
     expected = []
     with open(digestfile, 'w') as dh, open(filelist, 'w') as fh:
         curdir = basedir
         for d in xrange(4):
             if d > 0:
                 curdir = os.path.join(curdir, "folder{}".format(d))
                 create_folder(curdir)
             for n in xrange(5):
                 fpath = os.path.join(curdir, "file{}".format(n))
                 open(fpath, 'w').close()
                 rpath = os.path.relpath(fpath, basedir)
                 digest = hashfile(fpath, hasher=self.deliverer.hash_algorithm)
                 if n < 3:
                     expected.append(rpath)
                     fh.write("{}\n".format(rpath))
                     dh.write("{}  {}\n".format(digest, rpath))
         rpath = os.path.basename(digestfile)
         expected.append(rpath)
         fh.write("{}\n".format(rpath))
     # transfer the listed content
     destination = self.deliverer.expand_path(self.deliverer.deliverypath)
     create_folder(os.path.dirname(destination))
     self.assertTrue(self.deliverer.do_delivery(), "failed to deliver sample")
     # list the trasferred files relative to the destination
     observed = [os.path.relpath(os.path.join(d, f), destination)
                 for d, _, files in os.walk(destination) for f in files]
     self.assertItemsEqual(observed, expected)
示例#28
0
 def test_deliver_sample1(self):
     """ transfer a sample using rsync
     """
     # create some content to transfer
     digestfile = self.deliverer.staging_digestfile()
     filelist = self.deliverer.staging_filelist()
     basedir = os.path.dirname(digestfile)
     create_folder(basedir)
     expected = []
     with open(digestfile, 'w') as dh, open(filelist, 'w') as fh:
         curdir = basedir
         for d in range(4):
             if d > 0:
                 curdir = os.path.join(curdir, "folder{}".format(d))
                 create_folder(curdir)
             for n in range(5):
                 fpath = os.path.join(curdir, "file{}".format(n))
                 open(fpath, 'w').close()
                 rpath = os.path.relpath(fpath, basedir)
                 digest = hashfile(fpath, hasher=self.deliverer.hash_algorithm)
                 if n < 3:
                     expected.append(rpath)
                     fh.write(u"{}\n".format(rpath))
                     dh.write(u"{}  {}\n".format(digest, rpath))
         rpath = os.path.basename(digestfile)
         expected.append(rpath)
         fh.write(u"{}\n".format(rpath))
     # transfer the listed content
     destination = self.deliverer.expand_path(self.deliverer.deliverypath)
     create_folder(os.path.dirname(destination))
     self.assertTrue(self.deliverer.do_delivery(), "failed to deliver sample")
     # list the trasferred files relative to the destination
     observed = [os.path.relpath(os.path.join(d, f), destination)
                 for d, _, files in os.walk(destination) for f in files]
     self.assertEqual(sorted(observed), sorted(expected))
示例#29
0
def produce_analysis_piper(ngi_config, project_id):
    # Create piper_ngi
    analysis_dir = os.path.join(ngi_config['analysis']['base_root'],
                                ngi_config['analysis']['sthlm_root'],
                                ngi_config['analysis']['top_dir'],
                                'ANALYSIS', project_id)
    data_dir = os.path.join(ngi_config['analysis']['base_root'],
                            ngi_config['analysis']['sthlm_root'],
                            ngi_config['analysis']['top_dir'],
                            'DATA', project_id)

    piper_ngi_dir = os.path.join(analysis_dir, 'piper_ngi')
    fs.create_folder(piper_ngi_dir)
    piper_dirs = ['01_raw_alignments',
                  '02_preliminary_alignment_qc',
                  '03_genotype_concordance',
                  '04_merged_aligments',
                  '05_processed_alignments',
                  '06_final_alignment_qc',
                  '07_variant_calls',
                  '08_misc']
    for piper_dir in piper_dirs:
        current_dir =  os.path.join(piper_ngi_dir, piper_dir)
        fs.create_folder(current_dir)
        if piper_dir == '05_processed_alignments':
            for sample_id in os.listdir(data_dir):
                bam_file = '{}.clean.dedup.bam'.format(sample_id)
                fs.touch(os.path.join(current_dir, bam_file))
        if piper_dir == '07_variant_calls':
            for sample_id in os.listdir(data_dir):
                vcf_file = '{}.clean.dedup.recal.bam.raw.indel.vcf.gz'.format(sample_id)
                fs.touch(os.path.join(current_dir, vcf_file))
    current_dir = os.path.join(piper_ngi_dir, 'sbatch')
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, 'setup_xml_files')
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, 'logs')
    fs.create_folder(current_dir)
    create_version_report(current_dir)
 def create_report(self):
     """ Create a final aggregate report via a system call """
     logprefix = os.path.abspath(
         self.expand_path(os.path.join(self.logpath, self.projectid)))
     try:
         if not create_folder(os.path.dirname(logprefix)):
             logprefix = None
     except AttributeError:
         logprefix = None
     with chdir(self.expand_path(self.reportpath)):
         cl = self.report_aggregate.split(' ')
         call_external_command(cl,
                               with_log_files=(logprefix is not None),
                               prefix="{}_aggregate".format(logprefix))
示例#31
0
 def create_report(self):
     """ Create a final aggregate report via a system call """
     logprefix = os.path.abspath(
         self.expand_path(os.path.join(self.logpath, self.projectid)))
     try:
         if not create_folder(os.path.dirname(logprefix)):
             logprefix = None
     except AttributeError:
         logprefix = None
     with chdir(self.expand_path(self.reportpath)):
         cl = self.report_aggregate.split(' ')
         call_external_command(
             cl,
             with_log_files=(logprefix is not None),
             prefix="{}_aggregate".format(logprefix))
示例#32
0
def produce_analysis_qc_ngi(ngi_config, project_id):
    analysis_dir = os.path.join(ngi_config['analysis']['base_root'],
                                ngi_config['analysis']['sthlm_root'],
                                ngi_config['analysis']['top_dir'],
                                'ANALYSIS', project_id)
    data_dir = os.path.join(ngi_config['analysis']['base_root'],
                            ngi_config['analysis']['sthlm_root'],
                            ngi_config['analysis']['top_dir'],
                            'DATA', project_id)

    qc_ngi_dir = os.path.join(analysis_dir, 'qc_ngi')
    fs.create_folder(qc_ngi_dir)
    for sample_id in os.listdir(data_dir):
        sample_dir_qc = os.path.join(qc_ngi_dir, sample_id)
        fs.create_folder(sample_dir_qc)
        fastqc_dir = os.path.join(sample_dir_qc, 'fastqc')
        fs.create_folder(fastqc_dir)
        fastq_screen_dir  = os.path.join(sample_dir_qc, 'fastq_screen')
        fs.create_folder(fastq_screen_dir)
示例#33
0
def produce_analysis_qc_ngi(ngi_config, project_id):
    analysis_dir = os.path.join(ngi_config["analysis"]["base_root"],
                                            ngi_config["analysis"]["sthlm_root"],
                                            ngi_config["analysis"]["top_dir"],
                                            "ANALYSIS", project_id)
    data_dir = os.path.join(ngi_config["analysis"]["base_root"],
                                            ngi_config["analysis"]["sthlm_root"],
                                            ngi_config["analysis"]["top_dir"],
                                            "DATA", project_id)

    qc_ngi_dir = os.path.join(analysis_dir, "qc_ngi")
    fs.create_folder(qc_ngi_dir)
    for sample_id in os.listdir(data_dir):
        sample_dir_qc = os.path.join(qc_ngi_dir, sample_id)
        fs.create_folder(sample_dir_qc)
        fastqc_dir = os.path.join(sample_dir_qc, "fastqc")
        fs.create_folder(fastqc_dir)
        fastq_screen_dir  = os.path.join(sample_dir_qc, "fastq_screen")
        fs.create_folder(fastq_screen_dir)
示例#34
0
            raise AssertionError('No staged samples found in Charon')

        # collect other files (not samples) if any to include in the hard staging
        misc_to_deliver = [
            itm for itm in os.listdir(soft_stagepath)
            if os.path.splitext(itm)[0] not in samples_to_deliver
        ]

        question = "\nProject stagepath: {}\nSamples: {}\nMiscellaneous: {}\n\nProceed with delivery ? "
        question = question.format(soft_stagepath,
                                   ", ".join(samples_to_deliver),
                                   ", ".join(misc_to_deliver))
        if proceed_or_not(question):
            logger.info("Proceeding with delivery of {}".format(str(self)))
            #lock the delivery by creating the folder
            create_folder(hard_stagepath)
        else:
            logger.error(
                "Aborting delivery for {}, remove unwanted files and try again"
                .format(str(self)))
            return False

        hard_staged_samples = []
        for sample_id in samples_to_deliver:
            try:
                sample_deliverer = GrusSampleDeliverer(self.projectid,
                                                       sample_id)
                sample_deliverer.deliver_sample()
            except Exception, e:
                logger.error(
                    'Sample {} has not been hard staged. Error says: {}'.
示例#35
0
 def test_crete_folder2(self):
     """ Ensure that an existing folder is detected """
     self.assertTrue(filesystem.create_folder(self.rootdir),
                     "A pre-existing target folder was not detected")
示例#36
0
 def test_crete_folder2(self):
     """ Ensure that an existing folder is detected """
     self.assertTrue(
         filesystem.create_folder(self.rootdir),
         "A pre-existing target folder was not detected")