def __linkToFile(self, source, target, relative=True, dry_run=False): """Create symbolic link to a file Internal function to make symbolic links to primary data. Checks that the target links don't already exist, or if they do that the current source file is the same as that specified in the method call. Arguments: source: the file to be linked to target: the name of the link pointing to source relative: if True then make a relative link (if possible); otherwise link to the target as given (default) dry_run: if True then only report the actions that would be performed (default is False, perform the actions) """ # Check if target file already exists if os.path.exists(target): logging.warning("Target file %s already exists" % target) # Test if the sources match if os.readlink(target) != source: logging.error("Different sources for %s" % target) return if not dry_run: # Make symbolic links bcf_utils.mklink(source, target, relative=relative) else: # Report what would have been done print "ln -s %s %s" % (source, target)
def __linkToFile(self,source,target,relative=True,dry_run=False): """Create symbolic link to a file Internal function to make symbolic links to primary data. Checks that the target links don't already exist, or if they do that the current source file is the same as that specified in the method call. Arguments: source: the file to be linked to target: the name of the link pointing to source relative: if True then make a relative link (if possible); otherwise link to the target as given (default) dry_run: if True then only report the actions that would be performed (default is False, perform the actions) """ # Check if target file already exists if os.path.exists(target): logging.warning("Target file %s already exists" % target) # Test if the sources match if os.readlink(target) != source: logging.error("Different sources for %s" % target) return if not dry_run: # Make symbolic links bcf_utils.mklink(source,target,relative=relative) else: # Report what would have been done print "ln -s %s %s" % (source,target)
def create_analysis_dir(project, top_dir=None, merge_replicates=False, keep_names=False, dry_run=False): """Create and populate analysis directory for an IlluminaProject Creates a new directory and populates either with links to FASTQ files, or with 'merged' FASTQ files created by concatenating multiple FASTQs for each sample (which can happen for multiplexed runs where samples are split across multiple lanes). Project directory names are made up of the project name and then the experiment type, or just the project name if experiment type is not set. Arguments: project : populated IlluminaProject object top_dir : parent directory to create analysis subdirectory under. Defaults to cwd if not explicitly specified merge_replicates: if True then creates a single FASTQ file for each sample by merging multiple FASTQs together keep_names: if True then links to FASTQ files will have the same names as the original files; by default links use the shortest unique name dry_run : if True then report what would be done but don't actually perform any action Returns: Name of the project directory. """ project_dir = os.path.join(top_dir,project.full_name) print "Creating analysis directory for project '%s'..." % project.full_name # Check for & create directory if os.path.exists(project_dir): print "-> %s already exists" % project_dir else: print "Making analysis directory for %s" % project.name if not dry_run: bcf_utils.mkdir(project_dir,mode=0775) # Make an empty ScriptCode directory scriptcode_dir = os.path.join(project_dir,"ScriptCode") if os.path.exists(scriptcode_dir): print "'ScriptCode' directory %s already exists" % scriptcode_dir else: print "Making 'ScriptCode' directory for %s" % project.name if not dry_run: bcf_utils.mkdir(scriptcode_dir,mode=0775) # Check for & create links to fastq files if not merge_replicates: for sample in project.samples: fastq_names = IlluminaData.get_unique_fastq_names(sample.fastq) for fastq in sample.fastq: fastq_file = os.path.join(sample.dirn,fastq) if keep_names: fastq_ln = os.path.join(project_dir,fastq) else: fastq_ln = os.path.join(project_dir,fastq_names[fastq]) if os.path.exists(fastq_ln): logging.error("Failed to link to %s: %s already exists" % (fastq_file,os.path.basename(fastq_ln))) else: print "Linking to %s" % fastq if not dry_run: bcf_utils.mklink(fastq_file,fastq_ln,relative=True) else: # Merge files for replicates within each sample for sample in project.samples: replicates = {} # Gather replicates to be merged for fastq in sample.fastq: fastq_data = IlluminaData.IlluminaFastq(fastq) name = "%s_%s_R%d" % (fastq_data.sample_name, fastq_data.barcode_sequence, fastq_data.read_number) if name not in replicates: replicates[name] = [] replicates[name].append(os.path.join(sample.dirn,fastq)) # Sort into order replicates[name].sort() # Report detected replicates print "Sample %s" % sample.name for name in replicates: print "\tReplicate '%s'" % name for fastq in replicates[name]: print "\t\t%s" % fastq # Do the merge for name in replicates: merged_fastq = os.path.join(project_dir,name+'.fastq') bcf_utils.concatenate_fastq_files(merged_fastq,replicates[name]) # Return directory name return project_dir
if not options.merge_replicates: for sample in project.samples: fastq_names = get_unique_fastqs(sample) for fastq in sample.fastq: fastq_file = os.path.join(sample.dirn,fastq) if options.keep_names: fastq_ln = os.path.join(project_dir,fastq) else: fastq_ln = os.path.join(project_dir,fastq_names[fastq]) if os.path.exists(fastq_ln): logging.error("Failed to link to %s: %s already exists" % (fastq_file,os.path.basename(fastq_ln))) else: print "Linking to %s" % fastq if not options.dry_run: bcf_utils.mklink(fastq_file,fastq_ln,relative=True) else: # Merge files for replicates within each sample for sample in project.samples: replicates = {} # Gather replicates to be merged for fastq in sample.fastq: fastq_data = IlluminaData.IlluminaFastq(fastq) name = "%s_%s_R%d" % (fastq_data.sample_name, fastq_data.barcode_sequence, fastq_data.read_number) if name not in replicates: replicates[name] = [] replicates[name].append(os.path.join(sample.dirn,fastq)) # Sort into order replicates[name].sort()