def __init__(self,
                 libcode,
                 lanenum,
                 flowcell,
                 rundate,
                 runnumber=None,
                 url=None,
                 keepfastq=False,
                 genome=None,
                 facility='EXT',
                 machine='Unknown'):

        self.config = Config()
        self.library = Library.objects.get(code=libcode)
        self.lanenum = lanenum
        self.keepfastq = keepfastq
        self.facility = Facility.objects.get(code=facility)
        self.machine = machine
        self.flowcell = flowcell
        self.rundate = rundate
        self.url = url
        self.runnumber = runnumber

        if genome is None:
            self.genome = self.library.genome
        else:
            self.genome = Genome.objects.get(code=genome)
Exemplo n.º 2
0
    def build_genome_index_path(cls, genome, *args, **kwargs):

        # Import here rather than main file as otherwise cluster operations fail.
        from ..models import Program

        conf = Config()

        # Get information about default aligner, check that the program is
        # in path and try to predict its version.
        alignerinfo = ProgramSummary(conf.aligner,
                                     ssh_host=conf.althost,
                                     ssh_user=conf.althostuser,
                                     ssh_path=conf.althostpath,
                                     ssh_port=conf.althostport)

        # Check that the version of aligner has been registered in
        # repository.
        try:
            Program.objects.get(program=alignerinfo.program,
                                version=alignerinfo.version,
                                current=True)
            indexdir = "%s-%s" % (alignerinfo.program, alignerinfo.version)

        # If aligner version is missing, try to insert it into the database
        # (FIXME not yet implemented while we see how this works).
        except Program.DoesNotExist, _err:
            sys.exit((
                """Aligner "%s" version "%s" found at path "%s" """ %
                (alignerinfo.program, alignerinfo.version, alignerinfo.path)) +
                     "not recorded as current in repository! Quitting.")
Exemplo n.º 3
0
    def __init__(self, fq1, genome=None, enzyme=None, fq2=None):

        self.fq1 = fq1
        self.fq2 = fq2
        self.genome = genome
        self.genome_index = None
        self.enzyme = enzyme
        self.restriction_file = None

        self._check_file(fq1)
        self._check_file(fq2)

        self.alignment_program = 'bowtie2'

        self.conf = Config()

        self.hicup_conf_fname = os.path.join(
            self.conf.clusterworkdir,
            os.path.basename(self.fq1) + "_hicup.conf")
        self.hicup_output_dir = os.path.join(
            self.conf.clusterworkdir,
            os.path.basename(self.fq1) + "_hicup")
        report_name = self.fq1.rstrip('.gz')
        report_name = report_name.rstrip('.fq')
        self.hicup_report_fname = os.path.join(self.conf.clusterworkdir,
                                               report_name + ".hicup.html")

        # Get genome_file
        if self.genome is not None:
            self.genome_index = self._genome_index_path(genome)
            if enzyme is not None:
                self.restriction_file = self._restriction_file_path(
                    genome, enzyme)
def get_files(libcode, filetype='FASTQ'):
  '''List sequencing data files available for a given LIMS sample ID.'''
  conf = Config()

  ## If we know how to get the file type, use the standard artifact
  ## name; otherwise just treat filetype as a regular expression.
  label = ARTIFACT_LABEL.setdefault(filetype, re.compile(filetype))

  path_re = re.compile(r'(.*)/([^\/]+)$')

  root = runs_containing_samples(conf.lims_rest_uri, libcode)

  count = 0
  for run_elem in root.findall('./run'):
    run_id = run_elem.find('./runFolder').text

    print "Run ID: %s\n" % run_id
    
    run_root = get_lims_run_details(conf.lims_rest_uri, run_id)

    for lib_elem in run_root.findall("./run/flowcell/library/sample[name='%s']/.." % libcode):
      for file_elem in lib_elem.findall('./file'):
        name = file_elem.find('./artifactName').text
        if label.search(name):
          url = urlparse(file_elem.find('./url').text)
          pathbits = path_re.match(url.path)
          if pathbits:
            print ("host: %s:%d\npath: %s\nfile: %s\n"
                   % (url.hostname, url.port, pathbits.group(1), pathbits.group(2)))
            count += 1
          else:
            raise ValueError("Unexpected URL path structure: %s" % url.path)  

  if count == 0:
    print "Nothing found."
 def __init__(self, debug=False):
     self.debug = debug
     if self.debug:
         LOGGER.setLevel(DEBUG)
     else:
         LOGGER.setLevel(INFO)
     self.conf = Config()
Exemplo n.º 6
0
    def build_genome_index_path(cls, genome, *args, **kwargs):

        # Import here rather than main file as otherwise cluster operations fail.
        from ..models import Program

        conf = Config()

        # Get information about default aligner, check that the program is
        # in path and try to predict its version.
        alignerinfo = ProgramSummary('STAR',
                                     ssh_host=conf.cluster,
                                     ssh_port=conf.clusterport,
                                     ssh_user=conf.clusteruser,
                                     ssh_path=conf.clusterpath)
        indexdir = None

        # Check that the version of aligner has been registered in
        # repository.
        try:
            Program.objects.get(program=alignerinfo.program,
                                version=alignerinfo.version,
                                current=True)
            indexdir = "%s_%s" % ('STAR', alignerinfo.version)

        except Program.DoesNotExist, _err:
            sys.exit((
                """Aligner "%s" version "%s" found at path "%s" """ %
                (alignerinfo.program, alignerinfo.version, alignerinfo.path)) +
                     "not recorded as current in repository! Quitting.")
Exemplo n.º 7
0
    def __init__(self,
                 namespace=None,
                 throttle=0,
                 memsize=20,
                 time_limit=48,
                 ssh_key=None,
                 local_workdir='.'):

        self.config = Config()

        if namespace is None:
            self.namespace = str(os.getpid())
        else:
            self.namespace = namespace

        # These will default to the config cluster working directory.
        self.runner = ClusterJobRunner()
        self.submitter = ClusterJobSubmitter()

        self.memsize = memsize  # expressed in GB
        self.time_limit = time_limit
        self.throttle = throttle
        self.ssh_key = ssh_key

        local_workdir = os.path.abspath(local_workdir)
        if not os.path.exists(local_workdir):
            os.mkdir(local_workdir)
        self.local_workdir = local_workdir
 def __init__(self, verbose=False, test_mode=False):
   self.verbose = verbose
   self.test_mode = test_mode
   self.conf    = Config()
   self.libhandler = LibraryHandler(interactive=False, fuzzy=True,
                                    test_mode=test_mode)
   if verbose:
     LOGGER.setLevel(DEBUG)
   else:
     LOGGER.setLevel(INFO)
Exemplo n.º 9
0
 def __init__(self, lims=None, debug=False):
     if debug:
         LOGGER.setLevel(DEBUG)
     else:
         LOGGER.setLevel(INFO)
     self.conf = Config()
     self.missing_libraries = set()
     self.user_emails = set()
     if lims is None:
         lims = Lims()
     self.lims = lims
Exemplo n.º 10
0
    def __init__(self,
                 genome,
                 memsize=4,
                 coverage=False,
                 inprefixes=('IR_BQSR_ear_exome_',
                             'IR_BQSR_HCC_nodule_exome_'),
                 **kwargs):

        super(MutectManager, self).__init__(memsize=memsize, **kwargs)
        self.config = Config()
        self.genome = genome
        self.coverage = coverage
        self.inprefixes = inprefixes
Exemplo n.º 11
0
    def __init__(self, test_mode=False, finaldir=None, samplename=None):
        self.conf = Config()
        self.test_mode = test_mode
        if test_mode:
            LOGGER.setLevel(DEBUG)
        else:
            LOGGER.setLevel(INFO)

        # Default to saving the output in the current working directory.
        if finaldir is None:
            finaldir = os.path.realpath(os.getcwd())
        self.finaldir = finaldir
        self.samplename = samplename
Exemplo n.º 12
0
  def __init__(self, genome, prog, params='', progvers=None, headtrim=0, tailtrim=0):

    # Program and parameters can be a list or scalar. Params elements
    # should always be string; program can be either string or
    # osqpipe.models.Program.
    if all([ type(x) is not list for x in (prog, params) ]):

      # Scalar arguments
      self.prog    = [ prog ]
      self.params  = [ params ]

      # FIXME consider throwing an error here if progvers is already a list.
      self.progvers = [ progvers ]

    elif type(prog) is list:

      # List arguments (params may be the default empty string;
      # progvers may simply by a scalar None)
      self.prog    = prog

      if len(prog) == len(params):
        self.params  = params
      else:
        if params == '': # handle the empty default.
          self.params = [ '' for _x in prog ]
        else:
          raise ValueError("Lengths of prog and params list arguments"
                           + " must match.")
  
      if progvers is None: # handle the empty default.
        self.progvers = [ None for _x in prog ]
      else:
        if len(prog) == len(progvers):
          self.progvers = progvers
        else:
          raise ValueError("Lengths of prog and progvers list arguments"
                           + " must match.")

    else:
      raise TypeError("The params argument cannot be a list if prog is a scalar")

    self.genome  = genome
    self.headtrim = headtrim
    self.tailtrim = tailtrim
    self.conf = Config()
Exemplo n.º 13
0
    def __init__(self):

        self.cache_file = CACHE_FILE
        self.conf = Config()
        self._study_cache = {}
        self._missing_libcodes = set()

        # This defines the date from which all rows will be checked for
        # new information.
        if os.path.exists(self.cache_file):
            with open(self.cache_file, 'r') as cache:
                date = cache.readline().strip()
                self.last_status_date = datetime.strptime(date, DATEFORMAT)
        else:
            # Fallback if cache file not present.
            self.last_status_date = datetime.fromtimestamp(0)

        # The running_status_date just keeps track of the most recent
        # status_date seen. It will be put into the cache file upon exit.
        self.running_status_date = datetime.fromtimestamp(0)
def compute_fast_qcforRepository(code, facility, replicate):
    """
  Computes and stores a FastQC report for a lane in the
  repository. This function will raise an exception if the lane
  already has a fastqc report. Note that this code links into the
  standard pipeline report generator and so will correctly produce
  PDFs as well as the regular report files.
  """
    conf = Config()

    lane = Lane.objects.get(library__code=code,
                            facility__code=facility,
                            lanenum=replicate)

    if lane.laneqc_set.filter(
            provenance__program__program='fastqc').count() == 0:
        with LaneFastQCReport(target=lane, path=conf.hostpath) as qcrep:
            qcrep.insert_into_repository()
    else:
        raise StandardError("Lane already has a FastQC report.")
Exemplo n.º 15
0
    def __init__(self,
                 test_mode=False,
                 db_library_check=True,
                 demux_prog='demuxIllumina',
                 force_primary=False,
                 force_all=None,
                 lims=None,
                 trust_lims_adapters=None,
                 force_download=False):

        self.conf = Config()
        self.test_mode = test_mode
        self.db_library_check = db_library_check
        self.demux_prog = demux_prog
        self.ready = 'COMPLETE'
        self.force_download = force_download

        if force_all:
            self.ready = (self.ready, 'PRIMARY COMPLETE', 'INCOMPLETE')

        # This may now be obsolete with the transition to Genologics LIMS.
        elif force_primary:
            self.ready = (self.ready, 'PRIMARY COMPLETE')

        self._demux_files = {}
        self.output_files = []
        if lims is None:
            lims = Lims()
        if not lims.running():
            LOGGER.error("Remote LIMS access broken... cannot continue.")
            sys.exit("LIMS not running.")
        self.lims = lims

        # If adapters not already entered in repository, this option will
        # load these metadata from the upstream LIMS:
        self.trust_lims_adapters = trust_lims_adapters

        if self.test_mode:
            LOGGER.setLevel(DEBUG)
        else:
            LOGGER.setLevel(INFO)
Exemplo n.º 16
0
    def __init__(self, fq1, genome=None, enzyme=None, fq2=None):

        self.fq1 = fq1
        self.fq2 = fq2
        self.genome = genome
        self.genome_index = None
        self.enzyme = enzyme
        self.restriction_file = None

        self._check_file(fq1)
        self._check_file(fq2)

        self.alignment_program = 'bowtie2'

        self.conf = Config()

        self.hicup_output_dir = os.path.join(
            self.conf.clusterworkdir,
            os.path.basename(self.fq1) + "_hicup")
        create_remote_dir(self.conf.clusteruser, self.conf.cluster,
                          self.hicup_output_dir)

        # self.hicup_conf_fname = os.path.join(self.conf.clusterworkdir, os.path.basename(self.fq1) + "_hicup.conf")
        self.hicup_conf_fname = os.path.join(
            self.hicup_output_dir,
            os.path.basename(self.fq1) + "_hicup.conf")
        if self.fq1.endswith('p1.fq.gz'):
            report_name = self.fq1.replace('p1.fq.gz', '')
        else:
            report_name = self.fq1.replace('.fq.gz', '')
        self.hicup_report_fname = os.path.join(self.conf.clusterworkdir,
                                               report_name + ".hicup.html")
        self.hicup_report_bam = os.path.join(self.conf.clusterworkdir,
                                             report_name + ".bam")
        # Get genome_file
        if self.genome is not None:
            self.genome_index = self._genome_index_path(genome)
            if enzyme is not None:
                self.restriction_file = self._restriction_file_path(
                    genome, enzyme)
Exemplo n.º 17
0
def run_job(cmd, files, append=False, mem=2000, testmode=False):

    if files is None:
        files = []

    config = Config()

    try:
        host = config.althost
        assert (host != '')
        runner = DesktopJobSubmitter(test_mode=testmode)
    except Exception:
        runner = ClusterJobSubmitter(test_mode=testmode)

    if append:
        cmd = " ".join([cmd] + files)

    LOGGER.info("Transferring data files...")
    runner.transfer_data(files)

    LOGGER.info("Running command...")
    runner.submit_command(cmd, mem=mem)
Exemplo n.º 18
0
    def __init__(self,
                 destination,
                 lims=None,
                 test_mode=False,
                 unprocessed_only=False,
                 force_download=False):

        self.conf = Config()
        self.test_mode = test_mode
        self.unprocessed_only = unprocessed_only
        self.destination = destination
        self.force_download = force_download
        self.targets = set()
        if lims is None:
            lims = Lims()
        if not lims.running():
            LOGGER.error("Remote LIMS access broken... cannot continue.")
            sys.exit("LIMS not running.")
        self.lims = lims

        if self.test_mode:
            LOGGER.setLevel(DEBUG)
        else:
            LOGGER.setLevel(INFO)
Exemplo n.º 19
0
    def __init__(self,
                 genome,
                 finaldir='.',
                 samplename=None,
                 aligner=None,
                 *args,
                 **kwargs):

        # A little programming-by-contract, as it were.
        #    if not all( hasattr(self, x) for x in ('job')):
        #      raise StandardError("JobRunner instance not set.")

        self.conf = Config()

        # Support relative paths as input.
        self.finaldir = os.path.realpath(finaldir)

        # Check if genome exists.
        LOGGER.info("Checking if specified genome file exists.")
        cmd = None
        if aligner is not None and aligner == 'star':
            cmd = ("if [ -d %s ]; then echo yes; else echo no; fi" % genome)
        else:
            cmd = ("if [ -f %s ]; then echo yes; else echo no; fi" % genome)
        LOGGER.debug(cmd)

        if not self.job.test_mode:
            runjob = ClusterJobRunner(test_mode=self.job.test_mode)
            cmdstdoutfile = runjob.run_command(cmd)
            first_line = cmdstdoutfile.readline()
            first_line = first_line.rstrip('\n')
            if first_line != 'yes':
                raise ValueError("Genome %s inaccessible or missing." % genome)

        self.genome = genome
        self.samplename = sanitize_samplename(samplename)
Exemplo n.º 20
0
    def __init__(self):

        django.setup()

        self.conf = Config()
Exemplo n.º 21
0
 def __init__(self, testMode=False):
     self.testMode = testMode
     self.conf = Config()
     self.bedtype = Filetype.objects.get(code='bed')
     self.bgrtype = Filetype.objects.get(code='bgr')
Exemplo n.º 22
0
import datetime
import re

from subprocess import Popen, PIPE
from shutil import copy2

from django.db import transaction
from ..models import ArchiveLocation, Lanefile, Alnfile, \
    QCfile, AlnQCfile, Peakfile, MergedAlnfile, Datafile
from osqutil.utilities import checksum_file, bash_quote

from osqutil.config import Config
from osqutil.setup_logs import configure_logging

LOGGER = configure_logging('archive')
CONFIG = Config()


################################################################################
def _archive_file_via_scp(fobj, attempts=1, sleeptime=2):
    '''
  A wrapper for scp allowing multiple attempts for the transfer in case
  of recoverable error.
  '''
    unrecoverable = [
        'No such file or directory',
        'Failed to add the host to the list of known hosts',
        'Operation not permitted'
    ]

    arch = fobj.archive
Exemplo n.º 23
0
import os

# set up logger
from osqutil.setup_logs import configure_logging
from logging import WARNING
LOGGER = configure_logging(level=WARNING)

# import config
from osqutil.config import Config

# For insertion of lane info:
import django
from osqpipe.models import Lane, Library, ExternalRecord

# set up config
DBCONF = Config()

django.setup()


def check_ena_submission_integrity(code):

    library = None
    try:
        library = Library.objects.get(code=code)
    except Library.DoesNotExist:
        LOGGER.error("Library with code=%s not found!", code)
        sys.exit(1)

    # check for external ENA record for library
    try: