def __init__(self, libcode, lanenum, flowcell, rundate, runnumber=None, url=None, keepfastq=False, genome=None, facility='EXT', machine='Unknown'): self.config = Config() self.library = Library.objects.get(code=libcode) self.lanenum = lanenum self.keepfastq = keepfastq self.facility = Facility.objects.get(code=facility) self.machine = machine self.flowcell = flowcell self.rundate = rundate self.url = url self.runnumber = runnumber if genome is None: self.genome = self.library.genome else: self.genome = Genome.objects.get(code=genome)
def build_genome_index_path(cls, genome, *args, **kwargs): # Import here rather than main file as otherwise cluster operations fail. from ..models import Program conf = Config() # Get information about default aligner, check that the program is # in path and try to predict its version. alignerinfo = ProgramSummary(conf.aligner, ssh_host=conf.althost, ssh_user=conf.althostuser, ssh_path=conf.althostpath, ssh_port=conf.althostport) # Check that the version of aligner has been registered in # repository. try: Program.objects.get(program=alignerinfo.program, version=alignerinfo.version, current=True) indexdir = "%s-%s" % (alignerinfo.program, alignerinfo.version) # If aligner version is missing, try to insert it into the database # (FIXME not yet implemented while we see how this works). except Program.DoesNotExist, _err: sys.exit(( """Aligner "%s" version "%s" found at path "%s" """ % (alignerinfo.program, alignerinfo.version, alignerinfo.path)) + "not recorded as current in repository! Quitting.")
def __init__(self, fq1, genome=None, enzyme=None, fq2=None): self.fq1 = fq1 self.fq2 = fq2 self.genome = genome self.genome_index = None self.enzyme = enzyme self.restriction_file = None self._check_file(fq1) self._check_file(fq2) self.alignment_program = 'bowtie2' self.conf = Config() self.hicup_conf_fname = os.path.join( self.conf.clusterworkdir, os.path.basename(self.fq1) + "_hicup.conf") self.hicup_output_dir = os.path.join( self.conf.clusterworkdir, os.path.basename(self.fq1) + "_hicup") report_name = self.fq1.rstrip('.gz') report_name = report_name.rstrip('.fq') self.hicup_report_fname = os.path.join(self.conf.clusterworkdir, report_name + ".hicup.html") # Get genome_file if self.genome is not None: self.genome_index = self._genome_index_path(genome) if enzyme is not None: self.restriction_file = self._restriction_file_path( genome, enzyme)
def get_files(libcode, filetype='FASTQ'): '''List sequencing data files available for a given LIMS sample ID.''' conf = Config() ## If we know how to get the file type, use the standard artifact ## name; otherwise just treat filetype as a regular expression. label = ARTIFACT_LABEL.setdefault(filetype, re.compile(filetype)) path_re = re.compile(r'(.*)/([^\/]+)$') root = runs_containing_samples(conf.lims_rest_uri, libcode) count = 0 for run_elem in root.findall('./run'): run_id = run_elem.find('./runFolder').text print "Run ID: %s\n" % run_id run_root = get_lims_run_details(conf.lims_rest_uri, run_id) for lib_elem in run_root.findall("./run/flowcell/library/sample[name='%s']/.." % libcode): for file_elem in lib_elem.findall('./file'): name = file_elem.find('./artifactName').text if label.search(name): url = urlparse(file_elem.find('./url').text) pathbits = path_re.match(url.path) if pathbits: print ("host: %s:%d\npath: %s\nfile: %s\n" % (url.hostname, url.port, pathbits.group(1), pathbits.group(2))) count += 1 else: raise ValueError("Unexpected URL path structure: %s" % url.path) if count == 0: print "Nothing found."
def __init__(self, debug=False): self.debug = debug if self.debug: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO) self.conf = Config()
def build_genome_index_path(cls, genome, *args, **kwargs): # Import here rather than main file as otherwise cluster operations fail. from ..models import Program conf = Config() # Get information about default aligner, check that the program is # in path and try to predict its version. alignerinfo = ProgramSummary('STAR', ssh_host=conf.cluster, ssh_port=conf.clusterport, ssh_user=conf.clusteruser, ssh_path=conf.clusterpath) indexdir = None # Check that the version of aligner has been registered in # repository. try: Program.objects.get(program=alignerinfo.program, version=alignerinfo.version, current=True) indexdir = "%s_%s" % ('STAR', alignerinfo.version) except Program.DoesNotExist, _err: sys.exit(( """Aligner "%s" version "%s" found at path "%s" """ % (alignerinfo.program, alignerinfo.version, alignerinfo.path)) + "not recorded as current in repository! Quitting.")
def __init__(self, namespace=None, throttle=0, memsize=20, time_limit=48, ssh_key=None, local_workdir='.'): self.config = Config() if namespace is None: self.namespace = str(os.getpid()) else: self.namespace = namespace # These will default to the config cluster working directory. self.runner = ClusterJobRunner() self.submitter = ClusterJobSubmitter() self.memsize = memsize # expressed in GB self.time_limit = time_limit self.throttle = throttle self.ssh_key = ssh_key local_workdir = os.path.abspath(local_workdir) if not os.path.exists(local_workdir): os.mkdir(local_workdir) self.local_workdir = local_workdir
def __init__(self, verbose=False, test_mode=False): self.verbose = verbose self.test_mode = test_mode self.conf = Config() self.libhandler = LibraryHandler(interactive=False, fuzzy=True, test_mode=test_mode) if verbose: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO)
def __init__(self, lims=None, debug=False): if debug: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO) self.conf = Config() self.missing_libraries = set() self.user_emails = set() if lims is None: lims = Lims() self.lims = lims
def __init__(self, genome, memsize=4, coverage=False, inprefixes=('IR_BQSR_ear_exome_', 'IR_BQSR_HCC_nodule_exome_'), **kwargs): super(MutectManager, self).__init__(memsize=memsize, **kwargs) self.config = Config() self.genome = genome self.coverage = coverage self.inprefixes = inprefixes
def __init__(self, test_mode=False, finaldir=None, samplename=None): self.conf = Config() self.test_mode = test_mode if test_mode: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO) # Default to saving the output in the current working directory. if finaldir is None: finaldir = os.path.realpath(os.getcwd()) self.finaldir = finaldir self.samplename = samplename
def __init__(self, genome, prog, params='', progvers=None, headtrim=0, tailtrim=0): # Program and parameters can be a list or scalar. Params elements # should always be string; program can be either string or # osqpipe.models.Program. if all([ type(x) is not list for x in (prog, params) ]): # Scalar arguments self.prog = [ prog ] self.params = [ params ] # FIXME consider throwing an error here if progvers is already a list. self.progvers = [ progvers ] elif type(prog) is list: # List arguments (params may be the default empty string; # progvers may simply by a scalar None) self.prog = prog if len(prog) == len(params): self.params = params else: if params == '': # handle the empty default. self.params = [ '' for _x in prog ] else: raise ValueError("Lengths of prog and params list arguments" + " must match.") if progvers is None: # handle the empty default. self.progvers = [ None for _x in prog ] else: if len(prog) == len(progvers): self.progvers = progvers else: raise ValueError("Lengths of prog and progvers list arguments" + " must match.") else: raise TypeError("The params argument cannot be a list if prog is a scalar") self.genome = genome self.headtrim = headtrim self.tailtrim = tailtrim self.conf = Config()
def __init__(self): self.cache_file = CACHE_FILE self.conf = Config() self._study_cache = {} self._missing_libcodes = set() # This defines the date from which all rows will be checked for # new information. if os.path.exists(self.cache_file): with open(self.cache_file, 'r') as cache: date = cache.readline().strip() self.last_status_date = datetime.strptime(date, DATEFORMAT) else: # Fallback if cache file not present. self.last_status_date = datetime.fromtimestamp(0) # The running_status_date just keeps track of the most recent # status_date seen. It will be put into the cache file upon exit. self.running_status_date = datetime.fromtimestamp(0)
def compute_fast_qcforRepository(code, facility, replicate): """ Computes and stores a FastQC report for a lane in the repository. This function will raise an exception if the lane already has a fastqc report. Note that this code links into the standard pipeline report generator and so will correctly produce PDFs as well as the regular report files. """ conf = Config() lane = Lane.objects.get(library__code=code, facility__code=facility, lanenum=replicate) if lane.laneqc_set.filter( provenance__program__program='fastqc').count() == 0: with LaneFastQCReport(target=lane, path=conf.hostpath) as qcrep: qcrep.insert_into_repository() else: raise StandardError("Lane already has a FastQC report.")
def __init__(self, test_mode=False, db_library_check=True, demux_prog='demuxIllumina', force_primary=False, force_all=None, lims=None, trust_lims_adapters=None, force_download=False): self.conf = Config() self.test_mode = test_mode self.db_library_check = db_library_check self.demux_prog = demux_prog self.ready = 'COMPLETE' self.force_download = force_download if force_all: self.ready = (self.ready, 'PRIMARY COMPLETE', 'INCOMPLETE') # This may now be obsolete with the transition to Genologics LIMS. elif force_primary: self.ready = (self.ready, 'PRIMARY COMPLETE') self._demux_files = {} self.output_files = [] if lims is None: lims = Lims() if not lims.running(): LOGGER.error("Remote LIMS access broken... cannot continue.") sys.exit("LIMS not running.") self.lims = lims # If adapters not already entered in repository, this option will # load these metadata from the upstream LIMS: self.trust_lims_adapters = trust_lims_adapters if self.test_mode: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO)
def __init__(self, fq1, genome=None, enzyme=None, fq2=None): self.fq1 = fq1 self.fq2 = fq2 self.genome = genome self.genome_index = None self.enzyme = enzyme self.restriction_file = None self._check_file(fq1) self._check_file(fq2) self.alignment_program = 'bowtie2' self.conf = Config() self.hicup_output_dir = os.path.join( self.conf.clusterworkdir, os.path.basename(self.fq1) + "_hicup") create_remote_dir(self.conf.clusteruser, self.conf.cluster, self.hicup_output_dir) # self.hicup_conf_fname = os.path.join(self.conf.clusterworkdir, os.path.basename(self.fq1) + "_hicup.conf") self.hicup_conf_fname = os.path.join( self.hicup_output_dir, os.path.basename(self.fq1) + "_hicup.conf") if self.fq1.endswith('p1.fq.gz'): report_name = self.fq1.replace('p1.fq.gz', '') else: report_name = self.fq1.replace('.fq.gz', '') self.hicup_report_fname = os.path.join(self.conf.clusterworkdir, report_name + ".hicup.html") self.hicup_report_bam = os.path.join(self.conf.clusterworkdir, report_name + ".bam") # Get genome_file if self.genome is not None: self.genome_index = self._genome_index_path(genome) if enzyme is not None: self.restriction_file = self._restriction_file_path( genome, enzyme)
def run_job(cmd, files, append=False, mem=2000, testmode=False): if files is None: files = [] config = Config() try: host = config.althost assert (host != '') runner = DesktopJobSubmitter(test_mode=testmode) except Exception: runner = ClusterJobSubmitter(test_mode=testmode) if append: cmd = " ".join([cmd] + files) LOGGER.info("Transferring data files...") runner.transfer_data(files) LOGGER.info("Running command...") runner.submit_command(cmd, mem=mem)
def __init__(self, destination, lims=None, test_mode=False, unprocessed_only=False, force_download=False): self.conf = Config() self.test_mode = test_mode self.unprocessed_only = unprocessed_only self.destination = destination self.force_download = force_download self.targets = set() if lims is None: lims = Lims() if not lims.running(): LOGGER.error("Remote LIMS access broken... cannot continue.") sys.exit("LIMS not running.") self.lims = lims if self.test_mode: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO)
def __init__(self, genome, finaldir='.', samplename=None, aligner=None, *args, **kwargs): # A little programming-by-contract, as it were. # if not all( hasattr(self, x) for x in ('job')): # raise StandardError("JobRunner instance not set.") self.conf = Config() # Support relative paths as input. self.finaldir = os.path.realpath(finaldir) # Check if genome exists. LOGGER.info("Checking if specified genome file exists.") cmd = None if aligner is not None and aligner == 'star': cmd = ("if [ -d %s ]; then echo yes; else echo no; fi" % genome) else: cmd = ("if [ -f %s ]; then echo yes; else echo no; fi" % genome) LOGGER.debug(cmd) if not self.job.test_mode: runjob = ClusterJobRunner(test_mode=self.job.test_mode) cmdstdoutfile = runjob.run_command(cmd) first_line = cmdstdoutfile.readline() first_line = first_line.rstrip('\n') if first_line != 'yes': raise ValueError("Genome %s inaccessible or missing." % genome) self.genome = genome self.samplename = sanitize_samplename(samplename)
def __init__(self): django.setup() self.conf = Config()
def __init__(self, testMode=False): self.testMode = testMode self.conf = Config() self.bedtype = Filetype.objects.get(code='bed') self.bgrtype = Filetype.objects.get(code='bgr')
import datetime import re from subprocess import Popen, PIPE from shutil import copy2 from django.db import transaction from ..models import ArchiveLocation, Lanefile, Alnfile, \ QCfile, AlnQCfile, Peakfile, MergedAlnfile, Datafile from osqutil.utilities import checksum_file, bash_quote from osqutil.config import Config from osqutil.setup_logs import configure_logging LOGGER = configure_logging('archive') CONFIG = Config() ################################################################################ def _archive_file_via_scp(fobj, attempts=1, sleeptime=2): ''' A wrapper for scp allowing multiple attempts for the transfer in case of recoverable error. ''' unrecoverable = [ 'No such file or directory', 'Failed to add the host to the list of known hosts', 'Operation not permitted' ] arch = fobj.archive
import os # set up logger from osqutil.setup_logs import configure_logging from logging import WARNING LOGGER = configure_logging(level=WARNING) # import config from osqutil.config import Config # For insertion of lane info: import django from osqpipe.models import Lane, Library, ExternalRecord # set up config DBCONF = Config() django.setup() def check_ena_submission_integrity(code): library = None try: library = Library.objects.get(code=code) except Library.DoesNotExist: LOGGER.error("Library with code=%s not found!", code) sys.exit(1) # check for external ENA record for library try: