def add_file(fn, root): # Check that we don't want to ignore this file i_matches = [ n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n) ] if len(i_matches) > 0: logger.debug( "Ignoring file as matched an ignore pattern: {}".format(fn)) return None # Use mimetypes to exclude binary files where possible (ftype, encoding) = mimetypes.guess_type(os.path.join(root, fn)) if encoding is not None: logger.debug("Ignoring file as is encoded: {}".format(fn)) return None if ftype is not None and ftype.startswith('image'): if config.report_imgskips: logger.debug("Ignoring file as has filetype '{}': {}".format( ftype, fn)) return None # Limit search to files under 5MB to avoid 30GB FastQ files etc. try: filesize = os.path.getsize(os.path.join(root, fn)) except (IOError, OSError, ValueError, UnicodeDecodeError): logger.debug( "Couldn't read file when checking filesize: {}".format(fn)) else: if filesize > config.log_filesize_limit: logger.debug("Ignoring file as too large: {}".format(fn)) return None # Looks good! Remember this file files.append({'root': root, 'fn': fn})
def add_file(fn, root): # Check that we don't want to ignore this file i_matches = [n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)] if len(i_matches) > 0: logger.debug("Ignoring file as matched an ignore pattern: {}".format(fn)) return None # Use mimetypes to exclude binary files where possible (ftype, encoding) = mimetypes.guess_type(os.path.join(root, fn)) if encoding is not None: logger.debug("Ignoring file as is encoded: {}".format(fn)) return None if ftype is not None and ftype.startswith('image'): logger.debug("Ignoring file as has filetype '{}': {}".format(ftype, fn)) return None # Limit search to files under 5MB to avoid 30GB FastQ files etc. try: filesize = os.path.getsize(os.path.join(root,fn)) except (IOError, OSError, ValueError, UnicodeDecodeError): logger.debug("Couldn't read file when checking filesize: {}".format(fn)) else: if filesize > config.log_filesize_limit: logger.debug("Ignoring file as too large: {}".format(fn)) return None # Looks good! Remember this file files.append({ 'root': root, 'fn': fn })
def get_filelist(): def add_file(fn, root): # Check that we don't want to ignore this file i_matches = [n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)] if len(i_matches) > 0: logger.debug("Ignoring file as matched an ignore pattern: {}".format(fn)) return None # Use mimetypes to exclude binary files where possible (ftype, encoding) = mimetypes.guess_type(os.path.join(root, fn)) if encoding is not None: logger.debug("Ignoring file as is encoded: {}".format(fn)) return None if ftype is not None and ftype.startswith('image'): if config.report_imgskips: logger.debug("Ignoring file as has filetype '{}': {}".format(ftype, fn)) return None # Limit search to files under 5MB to avoid 30GB FastQ files etc. try: filesize = os.path.getsize(os.path.join(root,fn)) except (IOError, OSError, ValueError, UnicodeDecodeError): logger.debug("Couldn't read file when checking filesize: {}".format(fn)) else: if filesize > config.log_filesize_limit: logger.debug("Ignoring file as too large: {}".format(fn)) return None # Looks good! Remember this file files.append({ 'root': root, 'fn': fn }) # Go through the analysis directories for path in config.analysis_dir: if os.path.isdir(path): for root, dirnames, filenames in os.walk(path, followlinks=True, topdown=True): bname = os.path.basename(root) # Skip if this directory name matches config.fn_ignore_dirs d_matches = [n for n in config.fn_ignore_dirs if fnmatch.fnmatch(bname, n.rstrip(os.sep))] if len(d_matches) > 0: logger.debug("Ignoring directory as matched fn_ignore_dirs: {}".format(bname)) continue # Skip if this directory path matches config.fn_ignore_paths p_matches = [n for n in config.fn_ignore_paths if fnmatch.fnmatch(root, n.rstrip(os.sep))] if len(p_matches) > 0: logger.debug("Ignoring directory as matched fn_ignore_paths: {}".format(root)) continue # Search filenames in this directory for fn in filenames: add_file(fn, root) elif os.path.isfile(path): add_file(os.path.basename(path), os.path.dirname(path))
def get_filelist(): def add_file(fn, root): # Check that we don't want to ignore this file i_matches = [n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)] if len(i_matches) > 0: logger.debug("Ignoring file as matched an ignore pattern: {}".format(fn)) return None # Use mimetypes to exclude binary files where possible (ftype, encoding) = mimetypes.guess_type(os.path.join(root, fn)) if encoding is not None: logger.debug("Ignoring file as is encoded: {}".format(fn)) return None if ftype is not None and ftype.startswith('image'): logger.debug("Ignoring file as has filetype '{}': {}".format(ftype, fn)) return None # Limit search to files under 5MB to avoid 30GB FastQ files etc. try: filesize = os.path.getsize(os.path.join(root,fn)) except (IOError, OSError, ValueError, UnicodeDecodeError): logger.debug("Couldn't read file when checking filesize: {}".format(fn)) else: if filesize > config.log_filesize_limit: logger.debug("Ignoring file as too large: {}".format(fn)) return None # Looks good! Remember this file files.append({ 'root': root, 'fn': fn }) # Go through the analysis directories for directory in config.analysis_dir: if os.path.isdir(directory): for root, dirnames, filenames in os.walk(directory, followlinks=True, topdown=True): # Exclude any directories that match exclusion filters skip_dirs = [] for n in config.fn_ignore_files: for d in dirnames: if fnmatch.fnmatch(os.path.join(root, d).rstrip(os.pathsep), n.rstrip(os.pathsep)): skip_dirs.append(d) if len(skip_dirs) > 0: dirnames[:] = [d for d in dirnames if d not in skip_dirs] for s in skip_dirs: logger.debug("Ignoring directory as matched an ignore pattern: {}".format(s)) # Search filenames in this directory for fn in filenames: add_file(fn, root) elif os.path.isfile(directory): add_file(os.path.basename(directory), os.path.dirname(directory))
def general_stats_build_html(): """ Build the general stats HTML, be that a beeswarm plot or a table. """ # First - collect settings for shared keys shared_keys = defaultdict(lambda: dict()) for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): sk = headers[k].get('shared_key', None) if sk is not None: shared_keys[sk]['scale'] = headers[k]['scale'] shared_keys[sk]['dmax'] = max(headers[k]['dmax'], shared_keys[sk].get('dmax', headers[k]['dmax'])) shared_keys[sk]['dmin'] = max(headers[k]['dmin'], shared_keys[sk].get('dmin', headers[k]['dmin'])) modcols = ['55,126,184', '77,175,74', '152,78,163', '255,127,0', '228,26,28', '255,255,51', '166,86,40', '247,129,191', '153,153,153'] midx = 0 sample_names = set() for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): # Overwrite config with shared key settings sk = headers[k].get('shared_key', None) if sk is not None: headers[k]['scale'] = shared_keys[sk]['scale'] headers[k]['dmax'] = shared_keys[sk]['dmax'] headers[k]['dmin'] = shared_keys[sk]['dmin'] # Module colour headers[k]['modcol'] = modcols[midx] # Count data points for (sname, samp) in general_stats[mod]['data'].items(): sample_names.add(sname) # Increment module colour midx += 1 if midx > (len(modcols) - 1): midx = 0 # Make a beeswarm plot if we have lots of samples if len(sample_names) >= config.genstats_beeswarm_numseries: logger.debug('Plotting general statistics beeswarm - {} samples'.format(len(sample_names))) general_stats_build_beeswarm() else: logger.debug('Making general statistics table - {} samples'.format(len(sample_names))) general_stats_build_table()
def mqc_load_config(yaml_config): """ Load and parse a config file if we find it """ try: with open(yaml_config) as f: config = yaml.load(f) logger.debug( "Loading config settings from: {}".format(yaml_config)) for c, v in config.items(): if c == 'sp': # Merge filename patterns instead of replacing sp.update(v) if c == 'extra_fn_clean_exts': # Merge filename cleaning patterns instead of replacing fn_clean_exts.update(v) else: logger.debug("New config '{}': {}".format(c, v)) globals()[c] = v except (IOError, AttributeError): logger.debug("No MultiQC user config found: {}".format(yaml_config))
# Pre-alignment QC 'cutadapt', 'fastq_screen', 'fastqc' ] # Get all modules, including those from other extension packages all_avail_modules = {} avail_modules = OrderedDict() for entry_point in pkg_resources.iter_entry_points('multiqc.modules.v1'): nicename = str(entry_point).split('=')[0].strip() all_avail_modules[nicename] = entry_point # Start with modules not described above - probably plugins for m in all_avail_modules.keys(): if m not in module_order: avail_modules[m] = all_avail_modules[m] logger.debug("Module missing from order declaration: {}".format(m)) # Add known modules, in order defined above for m in module_order: if m in all_avail_modules.keys(): avail_modules[m] = all_avail_modules[m] ####################### # Available templates ####################### # Templates must be listed in setup.py under entry_points['multiqc.templates.v1'] # Get all templates, including those from other extension packages avail_templates = {} for entry_point in pkg_resources.iter_entry_points('multiqc.templates.v1'):
def mqc_load_config(yaml_config): """ Load and parse a config file if we find it """ if os.path.isfile(yaml_config): try: with open(yaml_config) as f: new_config = yaml.load(f) logger.debug("Loading config settings from: {}".format(yaml_config)) for c, v in new_config.items(): if c == 'sp': # Merge filename patterns instead of replacing sp.update(v) logger.debug("Added to filename patterns: {}".format(sp)) elif c == 'extra_fn_clean_exts': # Prepend to filename cleaning patterns instead of replacing fn_clean_exts[0:0] = v logger.debug("Added to filename clean extensions. Now looking for: {}".format(fn_clean_exts)) else: logger.debug("New config '{}': {}".format(c, v)) globals()[c] = v except (IOError, AttributeError) as e: logger.debug("Config error: {}".format(e)) else: logger.debug("No MultiQC config found: {}".format(yaml_config))
def mqc_load_config(yaml_config): """ Load and parse a config file if we find it """ if os.path.isfile(yaml_config): try: with open(yaml_config) as f: new_config = yaml.load(f) logger.debug("Loading config settings from: {}".format(yaml_config)) for c, v in new_config.items(): if c == 'sp': # Merge filename patterns instead of replacing sp.extend(v) logger.debug("Added to filename patterns: {}".format(sp)) if c == 'extra_fn_clean_exts': # Prepend to filename cleaning patterns instead of replacing fn_clean_exts[0:0] = v logger.debug("Added to filename clean extensions. Now looking for: {}".format(fn_clean_exts)) else: logger.debug("New config '{}': {}".format(c, v)) globals()[c] = v except (IOError, AttributeError) as e: logger.debug("Config error: {}".format(e)) else: logger.debug("No MultiQC config found: {}".format(yaml_config))
# Pre-alignment QC 'cutadapt', 'trimmomatic', 'skewer', 'fastq_screen', 'fastqc', ] # Get all modules, including those from other extension packages all_avail_modules = {} avail_modules = OrderedDict() for entry_point in pkg_resources.iter_entry_points('multiqc.modules.v1'): nicename = str(entry_point).split('=')[0].strip() all_avail_modules[nicename] = entry_point # Start with modules not described above - probably plugins for m in all_avail_modules.keys(): if m not in module_order: avail_modules[m] = all_avail_modules[m] logger.debug("Module missing from order declaration: {}".format(m)) # Add known modules, in order defined above for m in module_order: if m in all_avail_modules.keys(): avail_modules[m] = all_avail_modules[m] ####################### # Available templates ####################### # Templates must be listed in setup.py under entry_points['multiqc.templates.v1'] # Get all templates, including those from other extension packages avail_templates = {} for entry_point in pkg_resources.iter_entry_points('multiqc.templates.v1'):
def get_filelist(): def add_file(fn, root): # Check that we don't want to ignore this file i_matches = [ n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n) ] if len(i_matches) > 0: logger.debug( "Ignoring file as matched an ignore pattern: {}".format(fn)) return None # Use mimetypes to exclude binary files where possible (ftype, encoding) = mimetypes.guess_type(os.path.join(root, fn)) if encoding is not None: logger.debug("Ignoring file as is encoded: {}".format(fn)) return None if ftype is not None and ftype.startswith('image'): if config.report_imgskips: logger.debug("Ignoring file as has filetype '{}': {}".format( ftype, fn)) return None # Limit search to files under 5MB to avoid 30GB FastQ files etc. try: filesize = os.path.getsize(os.path.join(root, fn)) except (IOError, OSError, ValueError, UnicodeDecodeError): logger.debug( "Couldn't read file when checking filesize: {}".format(fn)) else: if filesize > config.log_filesize_limit: logger.debug("Ignoring file as too large: {}".format(fn)) return None # Looks good! Remember this file files.append({'root': root, 'fn': fn}) # Go through the analysis directories for path in config.analysis_dir: if os.path.isdir(path): for root, dirnames, filenames in os.walk(path, followlinks=True, topdown=True): bname = os.path.basename(root) # Skip if this directory name matches config.fn_ignore_dirs d_matches = [ n for n in config.fn_ignore_dirs if fnmatch.fnmatch(bname, n.rstrip(os.sep)) ] if len(d_matches) > 0: logger.debug( "Ignoring directory as matched fn_ignore_dirs: {}". format(bname)) continue # Skip if this directory path matches config.fn_ignore_paths p_matches = [ n for n in config.fn_ignore_paths if fnmatch.fnmatch(root, n.rstrip(os.sep)) ] if len(p_matches) > 0: logger.debug( "Ignoring directory as matched fn_ignore_paths: {}". format(root)) continue # Search filenames in this directory for fn in filenames: add_file(fn, root) elif os.path.isfile(path): add_file(os.path.basename(path), os.path.dirname(path))
def general_stats_build_table(): """ Helper function to add to the General Statistics table. Parses report.general_stats and returns HTML for general stats table. Also creates report.general_stats_raw for multiqc_general_stats.txt :param data: A dict with the data. First key should be sample name, then the data key, then the data. :param headers: Dict / OrderedDict with information for the headers, such as colour scales, min and max values etc. See docs/writing_python.md for more information. :return: None """ for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): rid = headers[k]['rid'] if headers[k].get('shared_key', None) is not None: sk = ' data-shared-key={}'.format(sk) else: sk = '' general_stats_html['headers'][rid] = '<th \ id="header_{rid}" \ class="chroma-col {rid}" \ data-chroma-scale="{scale}" \ data-chroma-max="{max}" \ data-chroma-min="{min}" \ {sk}><span data-toggle="tooltip" title="{mod}: {descrip}">{title}</span></th>' \ .format(rid=rid, scale=headers[k]['scale'], max=headers[k]['dmax'], min=headers[k]['dmin'], sk=sk, mod=mod, descrip=headers[k]['description'], title=headers[k]['title']) # Add the data table cells nrows = 0 for (sname, samp) in general_stats[mod]['data'].items(): if k in samp: val = samp[k] general_stats_raw[sname][rid] = val if 'modify' in headers[k] and callable(headers[k]['modify']): val = headers[k]['modify'](val) try: dmin = headers[k]['dmin'] dmax = headers[k]['dmax'] percentage = ((float(val) - dmin) / (dmax - dmin)) * 100; percentage = min(percentage, 100) percentage = max(percentage, 0) except (ZeroDivisionError,ValueError): percentage = 0 try: val = headers[k]['format'].format(val) except ValueError: try: val = headers[k]['format'].format(float(samp[k])) except ValueError: val = samp[k] except: val = samp[k] general_stats_html['rows'][sname][rid] = \ '<td class="data-coloured {rid}" >\ <div class="wrapper">\ <span class="bar" style="width:{percentage}%;"></span>\ <span class="val">{val}</span>\ </div>\ </td>'.format(rid=rid, percentage=percentage, val=val) nrows += 1 # Remove header if we don't have any filled cells for it if nrows == 0: general_stats_html['headers'].pop(rid, None) logger.debug('Removing header {} from general stats table, as no data'.format(k)) return None
def general_stats_build_html(): """ Helper function to add to the General Statistics table. Parses report.general_stats and returns HTML for general stats table. :param data: A dict with the data. First key should be sample name, then the data key, then the data. :param headers: Dict / OrderedDict with information for the headers, such as colour scales, min and max values etc. See docs/writing_python.md for more information. :return: None """ # First - collect settings for shared keys shared_keys = defaultdict(lambda: dict()) for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): sk = headers[k].get('shared_key', None) if sk is not None: shared_keys[sk]['scale'] = headers[k]['scale'] shared_keys[sk]['dmax'] = max( headers[k]['dmax'], shared_keys[sk].get('dmax', headers[k]['dmax'])) shared_keys[sk]['dmin'] = max( headers[k]['dmin'], shared_keys[sk].get('dmin', headers[k]['dmin'])) # Now build required HTML modcols = [ '228,26,28', '55,126,184', '77,175,74', '152,78,163', '255,127,0', '255,255,51', '166,86,40', '247,129,191', '153,153,153' ] midx = 0 for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): rid = headers[k]['rid'] headers[k]['modcol'] = modcols[midx] # Overwrite config with shared key settings sk = headers[k].get('shared_key', None) if sk is not None: headers[k]['scale'] = shared_keys[sk]['scale'] headers[k]['dmax'] = shared_keys[sk]['dmax'] headers[k]['dmin'] = shared_keys[sk]['dmin'] sk = ' data-shared-key={}'.format(sk) else: sk = '' general_stats_html['headers'][rid] = '<th \ id="header_{rid}" \ class="chroma-col {rid}" \ data-chroma-scale="{scale}" \ data-chroma-max="{max}" \ data-chroma-min="{min}" \ {sk}><span data-toggle="tooltip" title="{mod}: {descrip}">{title}</span></th>' \ .format(rid=rid, scale=headers[k]['scale'], max=headers[k]['dmax'], min=headers[k]['dmin'], sk=sk, mod=mod, descrip=headers[k]['description'], title=headers[k]['title']) # Add the data table cells nrows = 0 for (sname, samp) in general_stats[mod]['data'].items(): if k in samp: val = samp[k] if 'modify' in headers[k] and callable( headers[k]['modify']): val = headers[k]['modify'](val) try: percentage = ( (float(val) - headers[k]['dmin']) / (headers[k]['dmax'] - headers[k]['dmin'])) * 100 percentage = min(percentage, 100) percentage = max(percentage, 0) except ZeroDivisionError: percentage = 0 try: val = headers[k]['format'].format(val) except ValueError: val = headers[k]['format'].format(float(samp[k])) except: val = samp[k] general_stats_html['rows'][sname][rid] = \ '<td class="data-coloured {rid}" >\ <div class="wrapper">\ <span class="bar" style="width:{percentage}%;"></span>\ <span class="val">{val}</span>\ </div>\ </td>' .format(rid=rid, percentage=percentage, val=val) nrows += 1 # Remove header if we don't have any filled cells for it if nrows == 0: general_stats_html['headers'].pop(rid, None) logger.debug( 'Removing header {} from general stats table, as no data'. format(k)) # Index for colouring by module midx += 1 if midx > (len(modcols) - 1): midx = 0 return None
def general_stats_build_table(): """ Helper function to add to the General Statistics table. Parses report.general_stats and returns HTML for general stats table. Also creates report.general_stats_raw for multiqc_general_stats.txt :param data: A dict with the data. First key should be sample name, then the data key, then the data. :param headers: Dict / OrderedDict with information for the headers, such as colour scales, min and max values etc. See docs/writing_python.md for more information. :return: None """ # First - collect settings for shared keys shared_keys = defaultdict(lambda: dict()) for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): sk = headers[k].get('shared_key', None) if sk is not None: shared_keys[sk]['scale'] = headers[k]['scale'] shared_keys[sk]['dmax'] = max(headers[k]['dmax'], shared_keys[sk].get('dmax', headers[k]['dmax'])) shared_keys[sk]['dmin'] = max(headers[k]['dmin'], shared_keys[sk].get('dmin', headers[k]['dmin'])) # Now build required HTML modcols = ['228,26,28', '55,126,184', '77,175,74', '152,78,163', '255,127,0', '255,255,51', '166,86,40', '247,129,191', '153,153,153'] midx = 0 for mod in general_stats.keys(): headers = general_stats[mod]['headers'] for k in headers.keys(): rid = headers[k]['rid'] headers[k]['modcol'] = modcols[midx] # Overwrite config with shared key settings sk = headers[k].get('shared_key', None) if sk is not None: headers[k]['scale'] = shared_keys[sk]['scale'] headers[k]['dmax'] = shared_keys[sk]['dmax'] headers[k]['dmin'] = shared_keys[sk]['dmin'] sk = ' data-shared-key={}'.format(sk) else: sk = '' general_stats_html['headers'][rid] = '<th \ id="header_{rid}" \ class="chroma-col {rid}" \ data-chroma-scale="{scale}" \ data-chroma-max="{max}" \ data-chroma-min="{min}" \ {sk}><span data-toggle="tooltip" title="{mod}: {descrip}">{title}</span></th>' \ .format(rid=rid, scale=headers[k]['scale'], max=headers[k]['dmax'], min=headers[k]['dmin'], sk=sk, mod=mod, descrip=headers[k]['description'], title=headers[k]['title']) # Add the data table cells nrows = 0 for (sname, samp) in general_stats[mod]['data'].items(): if k in samp: val = samp[k] general_stats_raw[sname][rid] = val if 'modify' in headers[k] and callable(headers[k]['modify']): val = headers[k]['modify'](val) try: dmin = headers[k]['dmin'] dmax = headers[k]['dmax'] percentage = ((float(val) - dmin) / (dmax - dmin)) * 100; percentage = min(percentage, 100) percentage = max(percentage, 0) except (ZeroDivisionError,ValueError): percentage = 0 try: val = headers[k]['format'].format(val) except ValueError: try: val = headers[k]['format'].format(float(samp[k])) except ValueError: val = samp[k] except: val = samp[k] general_stats_html['rows'][sname][rid] = \ '<td class="data-coloured {rid}" >\ <div class="wrapper">\ <span class="bar" style="width:{percentage}%;"></span>\ <span class="val">{val}</span>\ </div>\ </td>'.format(rid=rid, percentage=percentage, val=val) nrows += 1 # Remove header if we don't have any filled cells for it if nrows == 0: general_stats_html['headers'].pop(rid, None) logger.debug('Removing header {} from general stats table, as no data'.format(k)) # Index for colouring by module midx += 1 if midx > (len(modcols) - 1): midx = 0 return None