import sys from subprocess import Popen, PIPE import shutil import django django.setup() from osqpipe.models import Restrictome, Genome, Program from osqutil.cluster import ClusterJobSubmitter from osqutil.utilities import write_to_remote_file, transfer_file, checksum_file from osqutil.config import Config from osqutil.setup_logs import configure_logging from logging import INFO, DEBUG LOGGER = configure_logging('hicup') class HiCUP(object): def __init__(self, fq1, genome=None, enzyme=None, fq2=None): self.fq1 = fq1 self.fq2 = fq2 self.genome = genome self.genome_index = None self.enzyme = enzyme self.restriction_file = None self._check_file(fq1) self._check_file(fq2)
import time import datetime import re from subprocess import Popen, PIPE from shutil import copy2 from django.db import transaction from ..models import ArchiveLocation, Lanefile, Alnfile, \ QCfile, AlnQCfile, Peakfile, MergedAlnfile, Datafile from osqutil.utilities import checksum_file, bash_quote from osqutil.config import Config from osqutil.setup_logs import configure_logging LOGGER = configure_logging('archive') CONFIG = Config() ################################################################################ def _archive_file_via_scp(fobj, attempts=1, sleeptime=2): ''' A wrapper for scp allowing multiple attempts for the transfer in case of recoverable error. ''' unrecoverable = [ 'No such file or directory', 'Failed to add the host to the list of known hosts', 'Operation not permitted' ]
md5sums = None if ARGS.md5arguments: fnames = [] md5sums = [] fname = True for s in ARGS.files: if fname: fnames.append(s) fname = False else: md5sums.append(s) fname = True if len(fnames) == len(md5sums): ARGS.files = fnames LOGGER = configure_logging(level=DEBUG) django.setup() HND = RepoFileHandler() if ARGS.summary_file is not None: HND.add_lane_summary(ARGS.summary_file) if len(ARGS.files): if ARGS.qcfile: HND.add_qc_files(ARGS.files, ARGS.program_name, md5sums=md5sums) else: HND.run(ARGS.files, md5files=ARGS.md5files, archive=ARGS.archive, md5sums=md5sums)
# The osqpipe python package is distributed in the hope that it will # be useful, but WITHOUT ANY WARRANTY; without even the implied # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with the osqpipe python package. If not, see # <http://www.gnu.org/licenses/>. import sys import os # set up logger from osqutil.setup_logs import configure_logging from logging import WARNING LOGGER = configure_logging(level=WARNING) # import config from osqutil.config import Config # For insertion of lane info: import django from osqpipe.models import Lane, Library, ExternalRecord # set up config DBCONF = Config() django.setup() def check_ena_submission_integrity(code):
'''Script to fetch fastq files from the LIMS.''' import sys import os import os.path import re from osqutil.utilities import build_incoming_fastq_name, unzip_file, \ set_file_permissions, checksum_file from .upstream_lims import Lims from ..models import Library, Lane from osqutil.config import Config from osqutil.setup_logs import configure_logging from logging import INFO, DEBUG LOGGER = configure_logging('fetch_fastq') ############################################################################### class FQFileFetcher(object): '''Class used to query the LIMS for fastq files associated with a given flowcell and download them to a destination directory.''' __slots__ = ('destination', 'lims', 'targets', 'test_mode', 'conf', 'unprocessed_only', 'force_download') def __init__(self, destination, lims=None, test_mode=False,
'''Module providing functions used to retrieve MGA file information from the Genologics LIMS.''' import sys import os import os.path from .upstream_lims import Lims from osqutil.utilities import call_subprocess, CalledProcessError from osqutil.config import Config from osqutil.setup_logs import configure_logging from logging import INFO, DEBUG CONFIG = Config() LOGGER = configure_logging('fetch_mga') TEST_MODE = False def fetch_mga(flowcell, flowlane, destination, nameprefix, lims_fc=None): """Fetches MGA report from Genologics LIMS. Returns PDF report.""" mgafiles = [] flowlane = int(flowlane) # start logging if TEST_MODE: LOGGER.setLevel(DEBUG) else: LOGGER.setLevel(INFO)
'''Given a set of metadata on the command-line, create a new row in the library repository table.''' import sys import re import copy from osqutil.config import Config from ..models import Factor, Genome, Antibody, Strain, Sex, Tissue, \ Library, Libtype, Project, Adapter, Linkerset, Sample, Source, \ Condition from django.db import transaction from osqutil.setup_logs import configure_logging LOGGER = configure_logging() CONFIG = Config() ###################################################################### class AnnotationMismatchError(ValueError): ''' Custom exception class used to signal a mismatch between input and database. ''' pass ###################################################################### class LibraryHandler(object): '''Class encapsulating the creation of new library records.'''
import re import tarfile from tempfile import mkdtemp from shutil import rmtree, move, copy from pkg_resources import Requirement, resource_filename from django.db import transaction, models from ..models import Program, LaneQC, QCfile, Filetype, Lanefile, DataProvenance, Datafile, DataProcess from osqutil.progsum import ProgramSummary from osqutil.utilities import checksum_file, call_subprocess, rezip_file, set_file_permissions, transfer_file from osqutil.config import Config from osqutil.setup_logs import configure_logging CONFIG = Config() LOGGER = configure_logging('laneqc') class QCReport(object): ''' Abstract superclass handling all QC reports. Note that this is implemented as a context manager, so you would typically use subclasses in the following way:: with LaneFastQCReport(target=l, program_name='fastqc') as rep: rep.insert_into_repository() ''' __slots__ = ('target', 'workdir', 'output_files', 'program_name', 'path', 'program_params', '_dbprog', '_delete_workdir', 'output_md5s', 'move_files')
from django.db import transaction from ..models import Alnfile, Library, Alignment, MergedAlnfile, Genome, Lane from osqutil.samtools import count_bam_reads from osqutil.utilities import call_subprocess, checksum_file, \ sanitize_samplename from osqutil.config import Config from .bwa_runner import ClusterJobManager import xml.etree.ElementTree as ET from tempfile import NamedTemporaryFile from pysam import AlignmentFile from shutil import move from osqutil.setup_logs import configure_logging LOGGER = configure_logging('gatk') CONFIG = Config() ################################################################################ # A handful of utility functions. def retrieve_readgroup_alignment(rgroup, genome=None, bamfilter=False): ''' Simply returns the osqpipe Alignment object for a given read group, a list of which is returned by this call: pysam.AlignmentFile.header.get('RG', []). ''' alns = Alignment.objects.filter(lane__library__code=rgroup.get('LB'), lane__facility__code=rgroup.get('CN'), lane__lanenum=rgroup.get('PU'))
from pipes import quote from distutils import spawn from socket import getfqdn, socket, AF_UNIX, SOCK_STREAM, gethostname from getpass import getuser from tempfile import NamedTemporaryFile from osqutil.utilities import bash_quote, sanitize_samplename, \ BamPostProcessor from osqutil.progsum import ProgramSummary from osqutil.cluster import make_bam_name_without_extension, \ ClusterJobRunner, ClusterJobSubmitter, DesktopJobSubmitter from osqutil.config import Config from osqutil.setup_logs import configure_logging LOGGER = configure_logging('bwa_runner') ############################################################################## def genome_fasta_path(genome, genomedir, indexdir=None): ''' Returns the expected path to the fasta file for a given genome index. If a specific fasta index directory is specified (e.g. bwa-0.6.1), the path points to a fasta file in that subdirectory. ''' sciname = genome.species.scientific_name sciname = sciname.replace(" ", "_") sciname = sciname.lower() if indexdir is not None:
import re import os.path import weakref from datetime import date, timedelta from time import sleep import xml.etree.ElementTree as ET import requests from osqutil.config import Config from osqutil.utilities import munge_cruk_emails from ..models import LibraryNameMap, User from osqutil.setup_logs import configure_logging from logging import INFO, DEBUG LOGGER = configure_logging('lims') CONFIG = Config() ############################################################################### def http_download_file(url, local_filename, params=None): ''' Download a remote file URL to a local filename. Returns the local filename on success. ''' # See http://stackoverflow.com/a/16696317 for the original source. if params is None: params = {} # Read large files in chunks using stream = True parameter
import re from osqutil.utilities import checksum_file, \ build_incoming_fastq_name, parse_incoming_fastq_name, call_subprocess, set_file_permissions, \ munge_cruk_emails, unzip_file, rezip_file, is_zipped from .upstream_lims import Lims from osqutil.config import Config from ..models import Library, Lane, Status, LibraryNameMap, User, Adapter, Facility, Machine from osqpipe.pipeline.smtp import send_email from .fetch_fastq import FQFileFetcher from osqutil.setup_logs import configure_logging from logging import INFO, DEBUG LOGGER = configure_logging('flowcell') ############################################################################### def demux_code(code): '''Split a comma-separated list of library codes into a list.''' return [x.strip() for x in code.split(",")] ############################################################################### class FlowCellProcess(object): '''Main class used for processing flowcells.'''
#################################################################################### # ControlledVocab management. Primarily concerned with handling # phonetic ("fuzzy") searches. We only support fuzzy searching on # filter, not get (since a fuzzy search may return multiple hits # anyway). from django.db import models from django.db.models import Max import re import os from osqutil.utilities import get_filename_libcode from osqutil.setup_logs import configure_logging LOGGER = configure_logging('models') from osqutil.config import Config CONFIG = Config() class ControlledVocabManager(models.Manager): # This will become a dict of dicts. N.B. deactivated as the caching mechanism currently creates more problems than it solves. # _fuzzyCache = {} _fuzzy_re = re.compile('(.*)__fuzzy$') def filter(self, **kwargs): kwargs = self._map_controlled_field(kwargs) (fuzzy_args,
'''Code to test for new, previously unseen FlowCell IDs in the SolexaLIMS back-end MySQL database. Original version shamelessly stolen from Gord Brown's runNotifier script.''' from osqutil.utilities import munge_cruk_emails from .upstream_lims import Lims, get_lims_run_history from osqutil.config import Config from ..models import Lane, Library, Status, Facility, User ################################################################################ from osqutil.setup_logs import configure_logging from logging import INFO, DEBUG LOGGER = configure_logging('lims_watcher') ################################################################################ class LimsWatcher(object): '''Class used to poll the Genologics LIMS REST API and retrieve a list of Run IDs ready for downstream processing. The associated lanes are created in the repository (assuming that the libraries are available), and returned to the caller. Note that typically the caller will want to re-query to retrieve all ready lanes, not just those found in the last sweep.''' __slots__ = ('conf', 'missing_libraries', 'user_emails', 'lims') def __init__(self, lims=None, debug=False):
import os import re from tempfile import NamedTemporaryFile from shutil import move from django.db import transaction from ..models import Program, AlignmentQC, AlnQCfile, Filetype, Alnfile, DataProvenance from .laneqc import QCReport from osqutil.progsum import ProgramSummary from osqutil.utilities import checksum_file, call_subprocess, rezip_file, set_file_permissions from osqutil.config import Config from osqutil.setup_logs import configure_logging CONFIG = Config() LOGGER = configure_logging('alignmentqc') class AlignmentQCReport(QCReport): ''' Abstract class handling all alignment-based QC Reports. ''' data_process = AlignmentQC target_name = 'alignment' data_file = AlnQCfile file_target_name = 'alignmentqc' class AlignmentCrossCorrReport(AlignmentQCReport): ''' Concrete AlignmentQCReport subclass implementing cross-correlation analysis plots. See the superclass for usage notes. '''
__version__ = "0.1" __credits__ = "Exploits cs_* libraries implemented by Gordon Brown." ## Dependencies: # # - Password free login to target scp server # - scp # - sed # - /home/fnc-odompipe/software/external/bin/wkhtmltopdf-amd64 import sys import getopt from osqutil.setup_logs import configure_logging from logging import INFO LOGGER = configure_logging(level=INFO) # New in Django 1.7 and above. import django django.setup() from osqpipe.pipeline.fetch_mga import fetch_mga TEST_MODE = False def usage(fname): """Prints usage """ print "" print "Program: %s (fetches MGA reports from Solexa LIMS)" % (fname,) print "Version: %s" % __version__ print "Usage: %s [-t/--test] <flowcell> <lane> <dest> [filename prefix] " % (fname,)
# along with the osqpipe python package. If not, see # <http://www.gnu.org/licenses/>. ''' Code tools used to manipulate bam files from within python. This is intended to be a pure-python implementation for improved maintainability. ''' import os import re import pysam from contextlib import contextmanager from logging import INFO from osqutil.setup_logs import configure_logging LOGGER = configure_logging('bampy', level=INFO) class Bamfile(pysam.AlignmentFile): ''' Subclass of pysam.AlignmentFile with a few convenience methods of our own. ''' # Note that while it might be nice to have a Bamfile class which # correctly auto-generates its own index, this is not going to be # easily maintainable due to the implementation of # pysam.AlignmentFile. Rather than coding ourselves into knots, we have # opted for a context manager approach (see open_bamfile). def read_refname(self, read): '''