Пример #1
0
import sys
from subprocess import Popen, PIPE
import shutil

import django
django.setup()

from osqpipe.models import Restrictome, Genome, Program

from osqutil.cluster import ClusterJobSubmitter
from osqutil.utilities import write_to_remote_file, transfer_file, checksum_file
from osqutil.config import Config

from osqutil.setup_logs import configure_logging
from logging import INFO, DEBUG
LOGGER = configure_logging('hicup')


class HiCUP(object):
    def __init__(self, fq1, genome=None, enzyme=None, fq2=None):

        self.fq1 = fq1
        self.fq2 = fq2
        self.genome = genome
        self.genome_index = None
        self.enzyme = enzyme
        self.restriction_file = None

        self._check_file(fq1)
        self._check_file(fq2)
Пример #2
0
import time
import datetime
import re

from subprocess import Popen, PIPE
from shutil import copy2

from django.db import transaction
from ..models import ArchiveLocation, Lanefile, Alnfile, \
    QCfile, AlnQCfile, Peakfile, MergedAlnfile, Datafile
from osqutil.utilities import checksum_file, bash_quote

from osqutil.config import Config
from osqutil.setup_logs import configure_logging

LOGGER = configure_logging('archive')
CONFIG = Config()


################################################################################
def _archive_file_via_scp(fobj, attempts=1, sleeptime=2):
    '''
  A wrapper for scp allowing multiple attempts for the transfer in case
  of recoverable error.
  '''
    unrecoverable = [
        'No such file or directory',
        'Failed to add the host to the list of known hosts',
        'Operation not permitted'
    ]
Пример #3
0
    md5sums = None
    if ARGS.md5arguments:
        fnames = []
        md5sums = []
        fname = True
        for s in ARGS.files:
            if fname:
                fnames.append(s)
                fname = False
            else:
                md5sums.append(s)
                fname = True
        if len(fnames) == len(md5sums):
            ARGS.files = fnames

    LOGGER = configure_logging(level=DEBUG)
    django.setup()

    HND = RepoFileHandler()

    if ARGS.summary_file is not None:
        HND.add_lane_summary(ARGS.summary_file)
    if len(ARGS.files):
        if ARGS.qcfile:
            HND.add_qc_files(ARGS.files, ARGS.program_name, md5sums=md5sums)
        else:
            HND.run(ARGS.files,
                    md5files=ARGS.md5files,
                    archive=ARGS.archive,
                    md5sums=md5sums)
Пример #4
0
# The osqpipe python package is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with the osqpipe python package.  If not, see
# <http://www.gnu.org/licenses/>.

import sys
import os

# set up logger
from osqutil.setup_logs import configure_logging
from logging import WARNING
LOGGER = configure_logging(level=WARNING)

# import config
from osqutil.config import Config

# For insertion of lane info:
import django
from osqpipe.models import Lane, Library, ExternalRecord

# set up config
DBCONF = Config()

django.setup()


def check_ena_submission_integrity(code):
Пример #5
0
'''Script to fetch fastq files from the LIMS.'''

import sys
import os
import os.path
import re

from osqutil.utilities import build_incoming_fastq_name, unzip_file, \
    set_file_permissions, checksum_file
from .upstream_lims import Lims
from ..models import Library, Lane
from osqutil.config import Config

from osqutil.setup_logs import configure_logging
from logging import INFO, DEBUG
LOGGER = configure_logging('fetch_fastq')

###############################################################################


class FQFileFetcher(object):
    '''Class used to query the LIMS for fastq files associated with a
  given flowcell and download them to a destination directory.'''

    __slots__ = ('destination', 'lims', 'targets', 'test_mode', 'conf',
                 'unprocessed_only', 'force_download')

    def __init__(self,
                 destination,
                 lims=None,
                 test_mode=False,
Пример #6
0
'''Module providing functions used to retrieve MGA file information
from the Genologics LIMS.'''

import sys
import os
import os.path

from .upstream_lims import Lims

from osqutil.utilities import call_subprocess, CalledProcessError
from osqutil.config import Config
from osqutil.setup_logs import configure_logging
from logging import INFO, DEBUG

CONFIG = Config()
LOGGER = configure_logging('fetch_mga')

TEST_MODE = False


def fetch_mga(flowcell, flowlane, destination, nameprefix, lims_fc=None):
    """Fetches MGA report from Genologics LIMS. Returns PDF report."""
    mgafiles = []
    flowlane = int(flowlane)

    # start logging
    if TEST_MODE:
        LOGGER.setLevel(DEBUG)
    else:
        LOGGER.setLevel(INFO)
Пример #7
0
'''Given a set of metadata on the command-line, create a new row in
the library repository table.'''

import sys
import re
import copy
from osqutil.config import Config

from ..models import Factor, Genome, Antibody, Strain, Sex, Tissue, \
    Library, Libtype, Project, Adapter, Linkerset, Sample, Source, \
    Condition

from django.db import transaction

from osqutil.setup_logs import configure_logging
LOGGER = configure_logging()

CONFIG = Config()


######################################################################
class AnnotationMismatchError(ValueError):
    '''
  Custom exception class used to signal a mismatch between input and database.
  '''
    pass


######################################################################
class LibraryHandler(object):
    '''Class encapsulating the creation of new library records.'''
Пример #8
0
import re
import tarfile

from tempfile import mkdtemp
from shutil import rmtree, move, copy
from pkg_resources import Requirement, resource_filename

from django.db import transaction, models
from ..models import Program, LaneQC, QCfile, Filetype, Lanefile, DataProvenance, Datafile, DataProcess
from osqutil.progsum import ProgramSummary
from osqutil.utilities import checksum_file, call_subprocess, rezip_file, set_file_permissions, transfer_file
from osqutil.config import Config
from osqutil.setup_logs import configure_logging

CONFIG = Config()
LOGGER = configure_logging('laneqc')


class QCReport(object):
    '''
  Abstract superclass handling all QC reports. Note that this is
  implemented as a context manager, so you would typically use
  subclasses in the following way::

    with LaneFastQCReport(target=l, program_name='fastqc') as rep:
      rep.insert_into_repository()
  '''

    __slots__ = ('target', 'workdir', 'output_files', 'program_name', 'path',
                 'program_params', '_dbprog', '_delete_workdir', 'output_md5s',
                 'move_files')
Пример #9
0
from django.db import transaction

from ..models import Alnfile, Library, Alignment, MergedAlnfile, Genome, Lane
from osqutil.samtools import count_bam_reads
from osqutil.utilities import call_subprocess, checksum_file, \
    sanitize_samplename
from osqutil.config import Config
from .bwa_runner import ClusterJobManager

import xml.etree.ElementTree as ET
from tempfile import NamedTemporaryFile
from pysam import AlignmentFile
from shutil import move

from osqutil.setup_logs import configure_logging
LOGGER = configure_logging('gatk')
CONFIG = Config()


################################################################################
# A handful of utility functions.
def retrieve_readgroup_alignment(rgroup, genome=None, bamfilter=False):
    '''
  Simply returns the osqpipe Alignment object for a given read group,
  a list of which is returned by this call:

  pysam.AlignmentFile.header.get('RG', []).
  '''
    alns = Alignment.objects.filter(lane__library__code=rgroup.get('LB'),
                                    lane__facility__code=rgroup.get('CN'),
                                    lane__lanenum=rgroup.get('PU'))
Пример #10
0
from pipes import quote
from distutils import spawn
from socket import getfqdn, socket, AF_UNIX, SOCK_STREAM, gethostname
from getpass import getuser
from tempfile import NamedTemporaryFile

from osqutil.utilities import bash_quote, sanitize_samplename, \
  BamPostProcessor
from osqutil.progsum import ProgramSummary
from osqutil.cluster import make_bam_name_without_extension, \
  ClusterJobRunner, ClusterJobSubmitter, DesktopJobSubmitter
from osqutil.config import Config

from osqutil.setup_logs import configure_logging

LOGGER = configure_logging('bwa_runner')

##############################################################################


def genome_fasta_path(genome, genomedir, indexdir=None):
    '''
  Returns the expected path to the fasta file for a given genome
  index. If a specific fasta index directory is specified
  (e.g. bwa-0.6.1), the path points to a fasta file in that
  subdirectory.
  '''
    sciname = genome.species.scientific_name
    sciname = sciname.replace(" ", "_")
    sciname = sciname.lower()
    if indexdir is not None:
Пример #11
0
import re
import os.path
import weakref
from datetime import date, timedelta
from time import sleep
import xml.etree.ElementTree as ET

import requests

from osqutil.config import Config
from osqutil.utilities import munge_cruk_emails
from ..models import LibraryNameMap, User

from osqutil.setup_logs import configure_logging
from logging import INFO, DEBUG
LOGGER = configure_logging('lims')

CONFIG = Config()

###############################################################################
def http_download_file(url, local_filename, params=None):
  '''
  Download a remote file URL to a local filename. Returns the local
  filename on success.
  '''
  # See http://stackoverflow.com/a/16696317 for the original source.

  if params is None:
    params = {}

  # Read large files in chunks using stream = True parameter
Пример #12
0
import re

from osqutil.utilities import checksum_file, \
    build_incoming_fastq_name, parse_incoming_fastq_name, call_subprocess, set_file_permissions, \
    munge_cruk_emails, unzip_file, rezip_file, is_zipped
from .upstream_lims import Lims
from osqutil.config import Config
from ..models import Library, Lane, Status, LibraryNameMap, User, Adapter, Facility, Machine

from osqpipe.pipeline.smtp import send_email

from .fetch_fastq import FQFileFetcher

from osqutil.setup_logs import configure_logging
from logging import INFO, DEBUG
LOGGER = configure_logging('flowcell')

###############################################################################


def demux_code(code):
    '''Split a comma-separated list of library codes into a list.'''
    return [x.strip() for x in code.split(",")]


###############################################################################


class FlowCellProcess(object):
    '''Main class used for processing flowcells.'''
Пример #13
0
####################################################################################
# ControlledVocab management. Primarily concerned with handling
# phonetic ("fuzzy") searches. We only support fuzzy searching on
# filter, not get (since a fuzzy search may return multiple hits
# anyway).

from django.db import models
from django.db.models import Max
import re
import os

from osqutil.utilities import get_filename_libcode

from osqutil.setup_logs import configure_logging
LOGGER = configure_logging('models')

from osqutil.config import Config
CONFIG = Config()


class ControlledVocabManager(models.Manager):

    # This will become a dict of dicts. N.B. deactivated as the caching mechanism currently creates more problems than it solves.
    #  _fuzzyCache = {}
    _fuzzy_re = re.compile('(.*)__fuzzy$')

    def filter(self, **kwargs):

        kwargs = self._map_controlled_field(kwargs)
        (fuzzy_args,
Пример #14
0
'''Code to test for new, previously unseen FlowCell IDs in the
SolexaLIMS back-end MySQL database. Original version shamelessly
stolen from Gord Brown's runNotifier script.'''

from osqutil.utilities import munge_cruk_emails
from .upstream_lims import Lims, get_lims_run_history
from osqutil.config import Config

from ..models import Lane, Library, Status, Facility, User

################################################################################

from osqutil.setup_logs import configure_logging
from logging import INFO, DEBUG

LOGGER = configure_logging('lims_watcher')

################################################################################


class LimsWatcher(object):
    '''Class used to poll the Genologics LIMS REST API and retrieve a
  list of Run IDs ready for downstream processing. The associated
  lanes are created in the repository (assuming that the libraries are
  available), and returned to the caller. Note that typically the
  caller will want to re-query to retrieve all ready lanes, not just
  those found in the last sweep.'''

    __slots__ = ('conf', 'missing_libraries', 'user_emails', 'lims')

    def __init__(self, lims=None, debug=False):
Пример #15
0
import os
import re

from tempfile import NamedTemporaryFile
from shutil import move

from django.db import transaction
from ..models import Program, AlignmentQC, AlnQCfile, Filetype, Alnfile, DataProvenance
from .laneqc import QCReport
from osqutil.progsum import ProgramSummary
from osqutil.utilities import checksum_file, call_subprocess, rezip_file, set_file_permissions
from osqutil.config import Config
from osqutil.setup_logs import configure_logging

CONFIG = Config()
LOGGER = configure_logging('alignmentqc')

class AlignmentQCReport(QCReport):
  '''
  Abstract class handling all alignment-based QC Reports.
  '''
  data_process     = AlignmentQC
  target_name      = 'alignment'
  data_file        = AlnQCfile
  file_target_name = 'alignmentqc'

class AlignmentCrossCorrReport(AlignmentQCReport):
  '''
  Concrete AlignmentQCReport subclass implementing cross-correlation
  analysis plots. See the superclass for usage notes.
  '''
Пример #16
0
__version__ = "0.1"
__credits__ = "Exploits cs_* libraries implemented by Gordon Brown."

## Dependencies:
#
# - Password free login to target scp server
# - scp
# - sed
# - /home/fnc-odompipe/software/external/bin/wkhtmltopdf-amd64

import sys
import getopt

from osqutil.setup_logs import configure_logging
from logging import INFO
LOGGER = configure_logging(level=INFO)

# New in Django 1.7 and above.
import django
django.setup()

from osqpipe.pipeline.fetch_mga import fetch_mga

TEST_MODE = False

def usage(fname):
  """Prints usage """
  print ""
  print "Program: %s (fetches MGA reports from Solexa LIMS)" % (fname,)
  print "Version: %s" % __version__
  print "Usage:   %s [-t/--test] <flowcell> <lane> <dest> [filename prefix] " % (fname,)
Пример #17
0
# along with the osqpipe python package.  If not, see
# <http://www.gnu.org/licenses/>.

'''
Code tools used to manipulate bam files from within python. This is
intended to be a pure-python implementation for improved
maintainability.
'''

import os
import re
import pysam
from contextlib import contextmanager
from logging import INFO
from osqutil.setup_logs import configure_logging
LOGGER = configure_logging('bampy', level=INFO)

class Bamfile(pysam.AlignmentFile):

  '''
  Subclass of pysam.AlignmentFile with a few convenience methods of our own.
  '''

  # Note that while it might be nice to have a Bamfile class which
  # correctly auto-generates its own index, this is not going to be
  # easily maintainable due to the implementation of
  # pysam.AlignmentFile. Rather than coding ourselves into knots, we have
  # opted for a context manager approach (see open_bamfile).

  def read_refname(self, read):
    '''