def get_library_names_and_timestamps(name): """ Extract sample/library names and timestamps Given a name from 'get_generic_name', extract the sample, library and timestamp elements. If no timestamp is present then it is returned as the string 'unknown'. For 'standard' SOLiD data directories, the top-level is a 'sample', which may be split into multiple 'libraries', and then again in multiple timestamped filesets. Some examples: * LH_POOL/results.F1B1/libraries/LH1/primary.20111208144829752/reads/solid0127_20111207_FRAG_BC_LH_POOL_BC_LH1 * ZD_hu/results.F1B1/primary.20091220022109452/reads/solid0424_20091214_ZD_hu_F3 * SH_JC1_pool/results.F1B1/libraries_MM2/JC_SEQ30/primary.20120125063517232/reads/solid0127_20120117_PE_BC_SH_JC1_pool_F5-BC_JC_SEQ30 """ # Extract timestamp timestamp = SolidData.extract_library_timestamp(name) if timestamp is None: timestamp = 'unknown' # Attempt to extract sample and library from the path new_path = [] for field in name.split(os.sep)[0:-1]: if field == "results.F1B1": continue elif field.startswith("libraries"): continue elif field == "reads": continue elif field.startswith("primary.") or field.startswith("secondary."): continue new_path.append(field) if len(new_path) < 2: sample = new_path[0] library = None else: sample,library = new_path[0:2] # Return return (sample,library,timestamp)
def get_library_names_and_timestamps(name): """ Extract sample/library names and timestamps Given a name from 'get_generic_name', extract the sample, library and timestamp elements. If no timestamp is present then it is returned as the string 'unknown'. For 'standard' SOLiD data directories, the top-level is a 'sample', which may be split into multiple 'libraries', and then again in multiple timestamped filesets. Some examples: * LH_POOL/results.F1B1/libraries/LH1/primary.20111208144829752/reads/solid0127_20111207_FRAG_BC_LH_POOL_BC_LH1 * ZD_hu/results.F1B1/primary.20091220022109452/reads/solid0424_20091214_ZD_hu_F3 * SH_JC1_pool/results.F1B1/libraries_MM2/JC_SEQ30/primary.20120125063517232/reads/solid0127_20120117_PE_BC_SH_JC1_pool_F5-BC_JC_SEQ30 """ # Extract timestamp timestamp = SolidData.extract_library_timestamp(name) if timestamp is None: timestamp = 'unknown' # Attempt to extract sample and library from the path new_path = [] for field in name.split(os.sep)[0:-1]: if field == "results.F1B1": continue elif field.startswith("libraries"): continue elif field == "reads": continue elif field.startswith("primary.") or field.startswith("secondary."): continue new_path.append(field) if len(new_path) < 2: sample = new_path[0] library = None else: sample, library = new_path[0:2] # Return return (sample, library, timestamp)
def verify_runs(solid_dirs): """Do basic verification checks on SOLiD run directories For each SOLiD run directory, create a SolidRun object and check for the expected sample and library directories, and that primary data files (csfasta and qual) have been assigned and exist. Returns a UNIX-like status code: 0 indicates that the checks passed, 1 indicates that they failed. Arguments: solid_dirs: a list of SOLiD sequencing directory names. Returns: 0 if the run is verified, 1 if there is a problem. """ print "Performing verification" status = 0 for solid_dir in solid_dirs: # Initialise run_status = 0 run = SolidData.SolidRun(solid_dir) if not run.verify(): run_status = 1 print "%s:" % run.run_name, if run_status == 0: print " [PASSED]" else: print " [FAILED]" status = 1 # Completed print "Overall status:", if status == 0: print " [PASSED]" else: print " [FAILED]" return status
logging.getLogger().setLevel(logging.DEBUG) elif options.no_warnings: logging.getLogger().setLevel(logging.ERROR) # Solid run directories for arg in args: if not os.path.isdir(arg): logging.error("'%s' not found or not a directory" % arg) sys.exit(1) if len(args) == 1: # Single directory supplied if options.only: solid_dirs = [args[0]] else: # Add associated directories solid_dirs = SolidData.list_run_directories(args[0]) else: # Use all supplied arguments solid_dirs = args # Output spreadsheet name if options.xls: spreadsheet = os.path.splitext(os.path.basename( solid_dirs[0]))[0] + ".xls" print "Writing spreadsheet %s" % spreadsheet # Check there's at least one thing to do if not (options.report or options.layout or options.xls or options.verify or options.rsync or options.md5sum or options.copy_pattern or options.gzip_pattern or options.md5_pattern): options.report = True
logging.getLogger().setLevel(logging.DEBUG) elif options.no_warnings: logging.getLogger().setLevel(logging.ERROR) # Solid run directories for arg in args: if not os.path.isdir(arg): logging.error("'%s' not found or not a directory" % arg) sys.exit(1) if len(args) == 1: # Single directory supplied if options.only: solid_dirs = [args[0]] else: # Add associated directories solid_dirs = SolidData.list_run_directories(args[0]) else: # Use all supplied arguments solid_dirs = args # Output spreadsheet name if options.xls: spreadsheet = os.path.splitext(os.path.basename(solid_dirs[0]))[0] + ".xls" print "Writing spreadsheet %s" % spreadsheet # Check there's at least one thing to do if not (options.report or options.layout or options.xls or options.verify or options.rsync or