Пример #1
0
  def aln_from_bedfile(self, bed):

    '''Given a bed file name, retrieve the associated database
    Alignment and Lane objects.'''

    LOGGER.info("Processing bed file: %s", bed)

    # A quick sanity check to try and make sure we don't load a file
    # against the wrong genome. This remains fallible since some
    # genome codes are quite short and might occur in a filename by
    # chance.
    if not re.search(self.genome, bed, re.I): # Note do not merge this re.I change into repackaging branch (it is unnecessary).
      raise ValueError("Genome code not found in bed file name."
                     + " Loading against the wrong genome (%s)?" % self.genome)

    (code, facility, lanenum, _pipeline) = parse_repository_filename(bed)
    lanelist  = Lane.objects.filter(library__code=code, 
                                    facility__code=facility, 
                                    lanenum=lanenum)
    if lanelist.count() == 0:
      raise ValueError("Could not find lane for '%s'" % (bed))
    elif lanelist.count() > 1:
      raise ValueError(("Found multiple lanes for '%s': "
                       % (bed,)) + ", ".join([x.id for x in lanelist]))
    else:
      lane = lanelist[0]

      aln = self._create_alignment(bed, lane)

    return (aln, lane)
Пример #2
0
 def setLaneStatusByFilename(self, fname, status):
     '''Changes status of the lane matching the library/facility/lanenum info extracted from the file name.'''
     # 1. Get lane_id matching library/facility/lanenum
     (library, facility, lanenum,
      pipeline) = parse_repository_filename(fname)
     # 2. Find lane and create one if not found
     try:
         lib = Library.objects.search_by_name(library)
     except Library.DoesNotExist, _err:
         raise SystemExit("No library %s in repository." % library)
Пример #3
0
class RepoFileHandler(object):
    '''Class which is almost certainly overkill given the limited
  functionality left in this script, post-refactor.'''
    @staticmethod
    def run(fns, md5files=False, archive=None, md5sums=None):
        '''Main entry point for the class.'''

        arc = None
        arc_date = None

        if archive is not None:
            try:
                arc = ArchiveLocation.objects.get(name=archive)
                arc_date = datetime.date.today()
            except ArchiveLocation.DoesNotExist, _err:
                raise SystemExit("No ArchiveLocation with name '%s'" % archive)

        i = 0
        for fname in fns:
            # We assume that the file names in the list may correspond to different lanes.
            # Hence, we search lane for each file again.
            lane = get_lane_for_file(fname)
            # Even though fname was already parsed in get_lane_for_file, parse it again as we need the pipeline value
            (code, facility, lanenum,
             pipeline) = parse_repository_filename(fname)

            # if md5sums have been provided
            if md5sums is not None:
                chksum = md5sums[i]
                i += 1
            else:
                # if md5sum is available in .md5 file on the file location
                if md5files:
                    chksum = checksum_from_file(fname)
            # As a last resort, try to compute md5sum
            if chksum is None:
                chksum = checksum_file(fname)
            filetype = Filetype.objects.guess_type(fname)
            basefn = os.path.split(fname)[1]
            fnparts = os.path.splitext(basefn)
            if fnparts[1] == '.gz':
                basefn = fnparts[0]
            LOGGER.debug("basefn: '%s'" % (basefn))

            lanefile = Lanefile(filename=basefn,
                                checksum=chksum,
                                filetype=filetype,
                                lane=lane,
                                pipeline=pipeline,
                                archive=arc,
                                archive_date=arc_date)
            lanefile.save()
            LOGGER.info("Added %s to repository.", basefn)
 def check_bam_vs_lane_fastq(self, bam, relaxed=False):
     '''
 Quick check that the number of reads in the bam file being saved
 is identical to the number of (passed PF) reads in the input fastq
 file. Returns the number of reads in the bam file.
 '''
     (code, facility, lanenum, _pipeline) = parse_repository_filename(bam)
     try:
         lane = Lane.objects.get(library__code=code,
                                 facility__code=facility,
                                 lanenum=lanenum)
     except Lane.DoesNotExist, err:
         LOGGER.error(
             "Unexpected lane in filename, not found in repository.")
         sys.exit("Unable to find lane in repository")
def append(fname, library=None, facility=None, lanenum=None, genome=None):
  '''
  Given a filename, figure out where it belongs and load it into
  the repository. Additional hints may be provided.
  '''
  LOGGER.info("Processing %s", fname)

  argcheck = [ x is not None for x in (library, facility, lanenum) ]
  if any(argcheck):
    if not all(argcheck):
      raise ValueError("Either use filename on its own, or all of"
                       + " the following: library, facility, lanenum.")
  else:
    (library, facility, lanenum, _pipeline) = parse_repository_filename(fname)

  try:
    library = Library.objects.get(code=library)
  except Library.DoesNotExist, err:
    raise StandardError("No library found with code %s" % (library,))
Пример #6
0
def get_lane_for_file(fname):

    lane = None
    (code, facility, lanenum, pipeline) = parse_repository_filename(fname)
    lanelist = Lane.objects.filter(library__code=code,
                                   lanenum=lanenum,
                                   facility__code=facility)
    if len(lanelist) == 0:
        LOGGER.error("Could not find lane for '%s'", fname)
    elif len(lanelist) > 1:
        LOGGER.error("Found multiple lanes for '%s': %s", fname,
                     ", ".join([x.id for x in lanelist]))
    else:
        lane = lanelist[0]

    if lane is None:
        LOGGER.error("Lane not determined! Exiting.")
        sys.exit(1)

    return lane
    def find_lanes(self):

        '''Find list of lane(s) associated with library, lane number and facility'''

        if not self.library:
            (code, facility, lanenum, pipeline) = parse_repository_filename(self.fn_base)
            # Merged files have only code in their prefix meaning failure by parse_repository_filename() above.
            if self.merged_file or self.lane:
                code = self.fn_base.split('_')[0]
        else:
            code = fn
        if code is None or code == '':
            LOGGER.error("Unable to extract code from filename %s." % self.fn_base)
            sys.exit(1)
        if self.merged_file or self.lane or self.library:
            self.lanes = Lane.objects.filter(library__code=code)
        else:
            self.lanes = Lane.objects.filter(library__code=code,
                                                lanenum=lanenum,
                                                facility__code=facility)
        if len(self.lanes) == 0:
            LOGGER.error("No lane associated with code \'%s\'." % code)
            sys.exit(1)
Пример #8
0
django.setup()

from osqpipe.models import Lane, Lanefile

from osqpipe.pipeline.laneqc import LaneFastQCReport
from osqutil.utilities import parse_repository_filename
from osqutil.config import Config

CONFIG = Config()

for d in os.listdir('.'):
    if os.path.isdir(d):
        for r in os.listdir(d):
            if os.path.isdir(os.path.join(d, r)):

                (lib, fac, lane, _pipeline) = parse_repository_filename(r)

                if lib is None:
                    print "Cannot parse dir name: %r. Skipping."
                    continue

                laneid = "%s_%s%02d" % (lib, fac, lane)
                try:
                    lane = Lane.objects.get(library__code=lib,
                                            facility__code=fac,
                                            lanenum=lane)
                except Lane.DoesNotExist, err:
                    print "Lane not found in DB: %s. Skipping." % laneid
                    continue
                if lane.laneqc_set.count() > 0:
                    print "Lane already has QC: %s. Skipping." % laneid