Пример #1
0
 def convert(self):
     # set flags
     if self.inputFileFormat == AlignmentFileConstants.SAM:
         inputFlags = "r"
     elif self.inputFileFormat == AlignmentFileConstants.BAM:
         inputFlags = "rb"
     if self.outputFileFormat == AlignmentFileConstants.SAM:
         outputFlags = "wh"
     elif self.outputFileFormat == AlignmentFileConstants.BAM:
         outputFlags = "wb"
     # open files
     inputFile = pysam.AlignmentFile(self.args.inputFile, inputFlags)
     outputFile = pysam.AlignmentFile(self.args.outputFile,
                                      outputFlags,
                                      header=inputFile.header)
     outputFilePath = outputFile.filename
     utils.log("Creating alignment file '{}'".format(outputFilePath))
     # write new file
     for _ in xrange(self.args.numLines):
         alignedSegment = inputFile.next()
         outputFile.write(alignedSegment)
     # clean up
     inputFile.close()
     outputFile.close()
     # create index file
     if (not self.args.skipIndexing
             and self.outputFileFormat == AlignmentFileConstants.BAM):
         indexFilePath = "{}.{}".format(outputFilePath,
                                        AlignmentFileConstants.BAI.lower())
         utils.log("Creating index file '{}'".format(indexFilePath))
         pysam.index(outputFilePath)
Пример #2
0
 def convert(self):
     # set flags
     if self.inputFileFormat == AlignmentFileConstants.SAM:
         inputFlags = "r"
     elif self.inputFileFormat == AlignmentFileConstants.BAM:
         inputFlags = "rb"
     if self.outputFileFormat == AlignmentFileConstants.SAM:
         outputFlags = "wh"
     elif self.outputFileFormat == AlignmentFileConstants.BAM:
         outputFlags = "wb"
     # open files
     inputFile = pysam.AlignmentFile(
         self.args.inputFile, inputFlags)
     outputFile = pysam.AlignmentFile(
         self.args.outputFile, outputFlags, header=inputFile.header)
     outputFilePath = outputFile.filename
     utils.log("Creating alignment file '{}'".format(outputFilePath))
     # write new file
     for _ in xrange(self.args.numLines):
         alignedSegment = inputFile.next()
         outputFile.write(alignedSegment)
     # clean up
     inputFile.close()
     outputFile.close()
     # create index file
     if (not self.args.skipIndexing and
             self.outputFileFormat == AlignmentFileConstants.BAM):
         indexFilePath = "{}.{}".format(
             outputFilePath, AlignmentFileConstants.BAI.lower())
         utils.log("Creating index file '{}'".format(indexFilePath))
         pysam.index(outputFilePath)
Пример #3
0
 def testLog(self):
     utils.log("message")
     self.assertEquals(self.printMock.call_count, 1)
Пример #4
0
    def __init__(self, inputDirectory, outputDirectory, force):
        """
        Converts human readable dataset from compliance repository,
        and translates it into a reference-server readable filesystem
        with binary files.
        :param inputDirectory: location of
            the human readable compliance dataset
        :param outputDirectory: location of
            the file hierarchy suitable for deploying on the reference server
        """
        self.inputDirectory = inputDirectory
        self.outputDirectory = outputDirectory
        self.repoPath = os.path.abspath(
            os.path.join(outputDirectory, "registry.db"))
        self.tempdir = None

        if os.path.exists(self.outputDirectory):
            if force:
                utils.log("Removing existing output directory at '{}'".format(
                    self.outputDirectory))
                shutil.rmtree(self.outputDirectory)
            else:
                utils.log("Output directory '{}' already exists".format(
                    self.outputDirectory))
                utils.log("Please specify an output path that does not exist")
                utils.log("Exiting...")
                exit(1)

        # If no input directory is specified download from GitHub
        if inputDirectory is None:
            utils.log("Downloading test data...")
            self.tempdir = tempfile.mkdtemp()
            assert (os.path.exists(self.tempdir))
            url = "https://github.com/ga4gh/compliance/archive/master.zip"
            filePath = os.path.join(self.tempdir, 'compliance-master.zip')
            downloader = file_downloader.HttpFileDownloader(url, filePath)
            downloader.download()
            utils.log("Extracting test data...")
            with zipfile.ZipFile(filePath, "r") as z:
                z.extractall(self.tempdir)
            self.inputDirectory = os.path.join(self.tempdir,
                                               'compliance-master',
                                               'test-data')
        repo = datarepo.SqlDataRepository(self.repoPath)
        self.repo = repo
Пример #5
0
 def cleanup(self):
     if self.tempdir is not None:
         shutil.rmtree(self.tempdir)
     utils.log("Done converting compliance data.")
     utils.log("Result in '{}'".format(self.outputDirectory))
Пример #6
0
 def runCommandCheckWarnings(self, cmd):
     utils.log("Running '{}'".format(cmd))
     splits = shlex.split(cmd)
     output = subprocess.check_output(splits).split('\n')
     self.ensureNoWarnings(output, cmd)
Пример #7
0
    def __init__(self, inputDirectory, outputDirectory, force):
        """
        Converts human readable dataset from compliance repository,
        and translates it into a reference-server readable filesystem
        with binary files.
        :param inputDirectory: location of
            the human readable compliance dataset
        :param outputDirectory: location of
            the file hierarchy suitable for deploying on the reference server
        """
        self.inputDirectory = inputDirectory
        self.outputDirectory = outputDirectory
        self.repoPath = os.path.abspath(
            os.path.join(outputDirectory, "registry.db"))
        self.tempdir = None

        if os.path.exists(self.outputDirectory):
            if force:
                utils.log(
                    "Removing existing output directory at '{}'".format(
                        self.outputDirectory))
                shutil.rmtree(self.outputDirectory)
            else:
                utils.log(
                    "Output directory '{}' already exists".format(
                        self.outputDirectory))
                utils.log(
                    "Please specify an output path that does not exist")
                utils.log("Exiting...")
                exit(1)

        # If no input directory is specified download from GitHub
        if inputDirectory is None:
            utils.log("Downloading test data...")
            self.tempdir = tempfile.mkdtemp()
            assert(os.path.exists(self.tempdir))
            url = "https://github.com/ga4gh/compliance/archive/master.zip"
            filePath = os.path.join(self.tempdir, 'compliance-master.zip')
            downloader = file_downloader.HttpFileDownloader(url, filePath)
            downloader.download()
            utils.log("Extracting test data...")
            with zipfile.ZipFile(filePath, "r") as z:
                z.extractall(self.tempdir)
            self.inputDirectory = os.path.join(
                self.tempdir, 'compliance-master', 'test-data')
        repo = datarepo.SqlDataRepository(self.repoPath)
        self.repo = repo
Пример #8
0
 def cleanup(self):
     if self.tempdir is not None:
         shutil.rmtree(self.tempdir)
     utils.log("Done converting compliance data.")
     utils.log("Result in '{}'".format(self.outputDirectory))
Пример #9
0
 def log(self, logStr):
     utils.log("{0} {1}".format(self.logStrPrefix, logStr))