Python getParallelWriterForPath示例，thunder.rdds.fileio.writers.getParallelWriterForPath Python示例

示例#1

0

显示文件

文件： blocks.py 项目： Peichao/thunder

    def saveAsBinarySeries(self, outputDirPath, overwrite=False):
        """Writes out Series-formatted data.

        Subclasses are *not* expected to override this method.

        Parameters
        ----------
        outputdirname : string path or URI to directory to be created
            Output files will be written underneath outputdirname. This directory must not yet exist
            (unless overwrite is True), and must be no more than one level beneath an existing directory.
            It will be created as a result of this call.

        overwrite : bool
            If true, outputdirname and all its contents will be deleted and recreated as part
            of this call.
        """
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.seriesloader import writeSeriesConfig
        from thunder.utils.aws import AWSCredentials

        if not overwrite:
            self._checkOverwrite(outputDirPath)
            overwrite = True  # prevent additional downstream checks for this path

        awsCredentialsOverride = AWSCredentials.fromContext(self.rdd.ctx)
        writer = getParallelWriterForPath(outputDirPath)(outputDirPath, overwrite=overwrite,
                                                         awsCredentialsOverride=awsCredentialsOverride)

        binseriesRdd = self.toBinarySeries()

        binseriesRdd.foreach(writer.writerFcn)
        writeSeriesConfig(outputDirPath, len(self.dims), self.nimages, keyType='int16', valueType=self.dtype,
                          overwrite=overwrite, awsCredentialsOverride=awsCredentialsOverride)

示例#2

0

显示文件

    def saveAsBinarySeries(self, outputDirPath, overwrite=False):
        """Writes out Series-formatted data.

        Subclasses are *not* expected to override this method.

        Parameters
        ----------
        outputdirname : string path or URI to directory to be created
            Output files will be written underneath outputdirname. This directory must not yet exist
            (unless overwrite is True), and must be no more than one level beneath an existing directory.
            It will be created as a result of this call.

        overwrite : bool
            If true, outputdirname and all its contents will be deleted and recreated as part
            of this call.
        """
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.seriesloader import writeSeriesConfig

        if not overwrite:
            from thunder.utils.common import raiseErrorIfPathExists
            raiseErrorIfPathExists(outputDirPath)
            overwrite = True  # prevent additional downstream checks for this path

        writer = getParallelWriterForPath(outputDirPath)(outputDirPath, overwrite=overwrite)

        binseriesRdd = self.toBinarySeries()

        binseriesRdd.foreach(writer.writerFcn)
        writeSeriesConfig(outputDirPath, len(self.dims), self.nimages, keyType='int16', valueType=self.dtype,
                          overwrite=overwrite)

示例#3

0

显示文件

文件： seriesloader.py 项目： rahulsharma1991/thunder

    def __saveSeriesRdd(self, seriesBlocks, outputDirPath, dims, npointsInSeries, dtype, overwrite=False):
        if not overwrite:
            self._checkOverwrite(outputDirPath)
            overwrite = True  # prevent additional downstream checks for this path
        writer = getParallelWriterForPath(outputDirPath)(
            outputDirPath, overwrite=overwrite, awsCredentialsOverride=self.awsCredentialsOverride
        )

        def blockToBinarySeries(kvIter):
            label = None
            keyPacker = None
            buf = StringIO()
            for seriesKey, series in kvIter:
                if keyPacker is None:
                    keyPacker = struct.Struct("h" * len(seriesKey))
                    label = SimpleBlocks.getBinarySeriesNameForKey(seriesKey) + ".bin"
                buf.write(keyPacker.pack(*seriesKey))
                buf.write(series.tostring())
            val = buf.getvalue()
            buf.close()
            return [(label, val)]

        seriesBlocks.mapPartitions(blockToBinarySeries).foreach(writer.writerFcn)
        writeSeriesConfig(
            outputDirPath,
            len(dims),
            npointsInSeries,
            valueType=dtype,
            overwrite=overwrite,
            awsCredentialsOverride=self.awsCredentialsOverride,
        )

示例#4

0

显示文件

文件： images.py 项目： mfcabrera/thunder

    def saveAsBinarySeries(self,
                           outputdirname,
                           blockSize="150M",
                           splitsPerDim=None,
                           groupingDim=None,
                           overwrite=False):
        """Writes Image into files on a local filesystem, suitable for loading by SeriesLoader.fromBinary()

        The mount point specified by outputdirname must be visible to all workers; thus this method is
        primarily useful either when Spark is being run locally or in the presence of an NFS mount or
        similar shared filesystem.

        Parameters
        ----------
        outputdirname : string path or URI to directory to be created
            Output files will be written underneath outputdirname. This directory must not yet exist
            (unless overwrite is True), and must be no more than one level beneath an existing directory.
            It will be created as a result of this call.

        blockSize : positive int or string
            Requests a particular size for individual output files; see toSeries()

        splitsPerDim : n-tuple of positive int
            Specifies that output files are to be generated by splitting the i-th dimension
            of the image into splitsPerDim[i] roughly equally-sized partitions; see toSeries()

        groupingDim : nonnegative int, 0 <= groupingDim <= len(self.dims)
            Specifies that intermediate blocks are to be generated by splitting the image
            into "planes" of dimensionality len(self.dims) - 1, along the dimension given by
            self.dims[groupingDim]; see toSeries()

        overwrite : bool
            If true, outputdirname and all its contents will be deleted and recreated as part
            of this call.

        """
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.seriesloader import writeSeriesConfig

        writer = getParallelWriterForPath(outputdirname)(outputdirname,
                                                         overwrite=overwrite)

        blocksdata = self._scatterToBlocks(blockSize=blockSize,
                                           blocksPerDim=splitsPerDim,
                                           groupingDim=groupingDim)

        binseriesrdd = blocksdata.toBinarySeries(seriesDim=0)

        def appendBin(kv):
            binlabel, binvals = kv
            return binlabel + '.bin', binvals

        binseriesrdd.map(appendBin).foreach(writer.writerFcn)
        writeSeriesConfig(outputdirname,
                          len(self.dims),
                          self.nimages,
                          dims=self.dims.count,
                          keytype='int16',
                          valuetype=self.dtype,
                          overwrite=overwrite)

示例#5

0

显示文件

文件： seriesloader.py 项目： mfcabrera/thunder

    def __saveSeriesRdd(seriesblocks,
                        outputdirname,
                        dims,
                        npointsinseries,
                        datatype,
                        overwrite=False):
        writer = getParallelWriterForPath(outputdirname)(outputdirname,
                                                         overwrite=overwrite)

        def blockToBinarySeries(kviter):
            label = None
            keypacker = None
            buf = StringIO()
            for seriesKey, series in kviter:
                if keypacker is None:
                    keypacker = struct.Struct('h' * len(seriesKey))
                    label = ImageBlocks.getBinarySeriesNameForKey(
                        seriesKey) + ".bin"
                buf.write(keypacker.pack(*seriesKey))
                buf.write(series.tostring())
            val = buf.getvalue()
            buf.close()
            return [(label, val)]

        seriesblocks.mapPartitions(blockToBinarySeries).foreach(
            writer.writerFcn)
        writeSeriesConfig(outputdirname,
                          len(dims),
                          npointsinseries,
                          dims=dims,
                          valuetype=datatype,
                          overwrite=overwrite)

示例#6

0

显示文件

文件： images.py 项目： mfcabrera/thunder

    def exportAsPngs(self,
                     outputdirname,
                     fileprefix="export",
                     overwrite=False,
                     collectToDriver=True):
        """Write out basic png files for two-dimensional image data.

        Files will be written into a newly-created directory on the local file system given by outputdirname.

        All workers must be able to see the output directory via an NFS share or similar.

        Parameters
        ----------
        outputdirname : string
            Path to output directory to be created. Exception will be thrown if this directory already
            exists, unless overwrite is True. Directory must be one level below an existing directory.

        fileprefix : string
            String to prepend to all filenames. Files will be named <fileprefix>00000.png, <fileprefix>00001.png, etc

        overwrite : bool
            If true, the directory given by outputdirname will first be deleted if it already exists.

        collectToDriver : bool, default True
            If true, images will be collect()'ed at the driver first before being written out, allowing
            for use of a local filesystem at the expense of network overhead. If false, images will be written
            in parallel by each executor, presumably to a distributed or networked filesystem.
        """
        dims = self.dims
        if not len(dims) == 2:
            raise ValueError(
                "Only two-dimensional images can be exported as .png files; image is %d-dimensional."
                % len(dims))

        from matplotlib.pyplot import imsave
        from io import BytesIO
        from thunder.rdds.fileio.writers import getParallelWriterForPath, getCollectedFileWriterForPath

        def toFilenameAndPngBuf(kv):
            key, img = kv
            fname = fileprefix + "%05d.png" % int(key)
            bytebuf = BytesIO()
            imsave(bytebuf, img, format="png")
            return fname, bytebuf.getvalue()

        bufrdd = self.rdd.map(toFilenameAndPngBuf)

        if collectToDriver:
            writer = getCollectedFileWriterForPath(outputdirname)(
                outputdirname, overwrite=overwrite)
            writer.writeCollectedFiles(bufrdd.collect())
        else:
            writer = getParallelWriterForPath(outputdirname)(
                outputdirname, overwrite=overwrite)
            bufrdd.foreach(writer.writerFcn)

示例#7

0

显示文件

文件： images.py 项目： industrial-sloth/thunder

    def exportAsPngs(self, outputDirPath, filePrefix="export", overwrite=False,
                     collectToDriver=True):
        """
        Write out basic png files for two-dimensional image data.

        Files will be written into a newly-created directory on the local file system given by outputdirname.

        All workers must be able to see the output directory via an NFS share or similar.

        Parameters
        ----------
        outputDirPath : string
            Path to output directory to be created. Exception will be thrown if this directory already
            exists, unless overwrite is True. Directory must be one level below an existing directory.

        filePrefix : string
            String to prepend to all filenames. Files will be named <fileprefix>00000.png, <fileprefix>00001.png, etc

        overwrite : bool
            If true, the directory given by outputdirname will first be deleted if it already exists.

        collectToDriver : bool, default True
            If true, images will be collect()'ed at the driver first before being written out, allowing
            for use of a local filesystem at the expense of network overhead. If false, images will be written
            in parallel by each executor, presumably to a distributed or networked filesystem.
        """
        dims = self.dims
        if not len(dims) == 2:
            raise ValueError("Only two-dimensional images can be exported as .png files; image is %d-dimensional." %
                             len(dims))

        from matplotlib.pyplot import imsave
        from io import BytesIO
        from thunder.rdds.fileio.writers import getParallelWriterForPath, getCollectedFileWriterForPath
        from thunder.utils.common import AWSCredentials

        def toFilenameAndPngBuf(kv):
            key, img = kv
            fname = filePrefix+"%05d.png" % int(key)
            bytebuf = BytesIO()
            imsave(bytebuf, img, format="png")
            return fname, bytebuf.getvalue()

        bufRdd = self.rdd.map(toFilenameAndPngBuf)

        awsCredentials = AWSCredentials.fromContext(self.rdd.ctx)
        if collectToDriver:
            writer = getCollectedFileWriterForPath(outputDirPath)(outputDirPath, overwrite=overwrite,
                                                                  awsCredentialsOverride=awsCredentials)
            writer.writeCollectedFiles(bufRdd.collect())
        else:
            writer = getParallelWriterForPath(outputDirPath)(outputDirPath, overwrite=overwrite,
                                                             awsCredentialsOverride=awsCredentials)
            bufRdd.foreach(writer.writerFcn)

示例#8

0

显示文件

文件： images.py 项目： yonglehou/thunder

    def saveAsBinaryImages(self,
                           outputDirPath,
                           prefix="image",
                           overwrite=False):
        """
        Write out images or volumes as flat binary files.

        Files will be written into a newly-created directory given by outputdirname.

        Parameters
        ----------
        outputDirPath : string
            Path to output directory to be created. Exception will be thrown if this directory already
            exists, unless overwrite is True. Directory must be one level below an existing directory.

        prefix : string
            String to prepend to all filenames. Files will be named <fileprefix>-00000.bin, <fileprefix>-00001.bin, etc

        overwrite : bool
            If true, the directory given by outputdirname will first be deleted if it already exists.
        """
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.imagesloader import writeBinaryImagesConfig
        from thunder.utils.aws import AWSCredentials
        import StringIO

        dimsTotal = list(asarray(self.dims.max) - asarray(self.dims.min) + 1)

        def toFilenameAndBinaryBuf(kv):
            key, img = kv
            fname = prefix + "-" + "%05d.bin" % int(key)
            buf = StringIO.StringIO()
            buf.write(img.transpose().copy().tostring())
            val = buf.getvalue()
            buf.close()
            return fname, val

        bufRdd = self.rdd.map(toFilenameAndBinaryBuf)

        awsCredentials = AWSCredentials.fromContext(self.rdd.ctx)
        writer = getParallelWriterForPath(outputDirPath)(
            outputDirPath,
            overwrite=overwrite,
            awsCredentialsOverride=awsCredentials)
        bufRdd.foreach(writer.writerFcn)
        writeBinaryImagesConfig(outputDirPath,
                                dims=dimsTotal,
                                dtype=self.dtype,
                                overwrite=overwrite,
                                awsCredentialsOverride=awsCredentials)

示例#9

0

显示文件

文件： images.py 项目： mfcabrera/thunder

    def saveAsBinarySeries(self, outputdirname, blockSize="150M", splitsPerDim=None, groupingDim=None,
                           overwrite=False):
        """Writes Image into files on a local filesystem, suitable for loading by SeriesLoader.fromBinary()

        The mount point specified by outputdirname must be visible to all workers; thus this method is
        primarily useful either when Spark is being run locally or in the presence of an NFS mount or
        similar shared filesystem.

        Parameters
        ----------
        outputdirname : string path or URI to directory to be created
            Output files will be written underneath outputdirname. This directory must not yet exist
            (unless overwrite is True), and must be no more than one level beneath an existing directory.
            It will be created as a result of this call.

        blockSize : positive int or string
            Requests a particular size for individual output files; see toSeries()

        splitsPerDim : n-tuple of positive int
            Specifies that output files are to be generated by splitting the i-th dimension
            of the image into splitsPerDim[i] roughly equally-sized partitions; see toSeries()

        groupingDim : nonnegative int, 0 <= groupingDim <= len(self.dims)
            Specifies that intermediate blocks are to be generated by splitting the image
            into "planes" of dimensionality len(self.dims) - 1, along the dimension given by
            self.dims[groupingDim]; see toSeries()

        overwrite : bool
            If true, outputdirname and all its contents will be deleted and recreated as part
            of this call.

        """
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.seriesloader import writeSeriesConfig

        writer = getParallelWriterForPath(outputdirname)(outputdirname, overwrite=overwrite)

        blocksdata = self._scatterToBlocks(blockSize=blockSize, blocksPerDim=splitsPerDim, groupingDim=groupingDim)

        binseriesrdd = blocksdata.toBinarySeries(seriesDim=0)

        def appendBin(kv):
            binlabel, binvals = kv
            return binlabel+'.bin', binvals

        binseriesrdd.map(appendBin).foreach(writer.writerFcn)
        writeSeriesConfig(outputdirname, len(self.dims), self.nimages, dims=self.dims.count,
                          keytype='int16', valuetype=self.dtype, overwrite=overwrite)

示例#10

0

显示文件

文件： images.py 项目： vsingh58/thunder

    def saveAsBinaryImages(self, outputDirPath, prefix="image", overwrite=False):
        """
        Write out images or volumes as flat binary files.

        Files will be written into a newly-created directory given by outputdirname.

        Parameters
        ----------
        outputDirPath : string
            Path to output directory to be created. Exception will be thrown if this directory already
            exists, unless overwrite is True. Directory must be one level below an existing directory.

        prefix : string
            String to prepend to all filenames. Files will be named <fileprefix>-00000.bin, <fileprefix>-00001.bin, etc

        overwrite : bool
            If true, the directory given by outputdirname will first be deleted if it already exists.
        """
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.imagesloader import writeBinaryImagesConfig
        from thunder.utils.aws import AWSCredentials
        import StringIO

        dimsTotal = list(asarray(self.dims.max)-asarray(self.dims.min)+1)

        def toFilenameAndBinaryBuf(kv):
            key, img = kv
            fname = prefix+"-"+"%05d.bin" % int(key)
            buf = StringIO.StringIO()
            buf.write(img.transpose().copy().tostring())
            val = buf.getvalue()
            buf.close()
            return fname, val

        bufRdd = self.rdd.map(toFilenameAndBinaryBuf)

        awsCredentials = AWSCredentials.fromContext(self.rdd.ctx)
        writer = getParallelWriterForPath(outputDirPath)(outputDirPath, overwrite=overwrite,
                                                         awsCredentialsOverride=awsCredentials)
        bufRdd.foreach(writer.writerFcn)
        writeBinaryImagesConfig(outputDirPath, dims=dimsTotal, dtype=self.dtype,
                                overwrite=overwrite, awsCredentialsOverride=awsCredentials)

示例#11

0

显示文件

文件： seriesloader.py 项目： Young-china/thunder

    def __saveSeriesRdd(seriesblocks, outputdirname, dims, npointsinseries, datatype, overwrite=False):
        writer = getParallelWriterForPath(outputdirname)(outputdirname, overwrite=overwrite)

        def blockToBinarySeries(kviter):
            label = None
            keypacker = None
            buf = StringIO()
            for seriesKey, series in kviter:
                if keypacker is None:
                    keypacker = struct.Struct('h'*len(seriesKey))
                    label = ImageBlocks.getBinarySeriesNameForKey(seriesKey) + ".bin"
                buf.write(keypacker.pack(*seriesKey))
                buf.write(series.tostring())
            val = buf.getvalue()
            buf.close()
            return [(label, val)]

        seriesblocks.mapPartitions(blockToBinarySeries).foreach(writer.writerFcn)
        writeSeriesConfig(outputdirname, len(dims), npointsinseries, dims=dims, valuetype=datatype,
                          overwrite=overwrite)

示例#12

0

显示文件

文件： seriesloader.py 项目： yonglehou/thunder

    def __saveSeriesRdd(self,
                        seriesBlocks,
                        outputDirPath,
                        dims,
                        npointsInSeries,
                        dtype,
                        overwrite=False):
        if not overwrite:
            self._checkOverwrite(outputDirPath)
            overwrite = True  # prevent additional downstream checks for this path
        writer = getParallelWriterForPath(outputDirPath)(
            outputDirPath,
            overwrite=overwrite,
            awsCredentialsOverride=self.awsCredentialsOverride)

        def blockToBinarySeries(kvIter):
            label = None
            keyPacker = None
            buf = StringIO()
            for seriesKey, series in kvIter:
                if keyPacker is None:
                    keyPacker = struct.Struct('h' * len(seriesKey))
                    label = SimpleBlocks.getBinarySeriesNameForKey(
                        seriesKey) + ".bin"
                buf.write(keyPacker.pack(*seriesKey))
                buf.write(series.tostring())
            val = buf.getvalue()
            buf.close()
            return [(label, val)]

        seriesBlocks.mapPartitions(blockToBinarySeries).foreach(
            writer.writerFcn)
        writeSeriesConfig(outputDirPath,
                          len(dims),
                          npointsInSeries,
                          valueType=dtype,
                          overwrite=overwrite,
                          awsCredentialsOverride=self.awsCredentialsOverride)

示例#13

0

显示文件

    def saveAsBinarySeries(self, outputDirPath, overwrite=False):
        """
        Writes out Series-formatted data.

        This method (Series.saveAsBinarySeries) writes out binary series files using the current partitioning
        of this Series object. (That is, if mySeries.rdd.getNumPartitions() == 5, then 5 files will be written
        out, one per partition.) The records will not be resorted; the file names for each partition will be
        taken from the key of the first Series record in that partition. If the Series object is already
        sorted and no records have been removed by filtering, then the resulting output should be equivalent
        to what one would get from calling myImages.saveAsBinarySeries().

        If all one wishes to do is to save out Images data in a binary series format, then
        tsc.convertImagesToSeries() will likely be more efficient than
        tsc.loadImages().toSeries().saveAsBinarySeries().

        Parameters
        ----------
        outputDirPath : string path or URI to directory to be created
            Output files will be written underneath outputdirname. This directory must not yet exist
            (unless overwrite is True), and must be no more than one level beneath an existing directory.
            It will be created as a result of this call.

        overwrite : bool
            If true, outputdirname and all its contents will be deleted and recreated as part
            of this call.
        """
        import cStringIO as StringIO
        import struct
        from thunder.rdds.imgblocks.blocks import SimpleBlocks
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.seriesloader import writeSeriesConfig
        from thunder.utils.common import AWSCredentials

        if not overwrite:
            self._checkOverwrite(outputDirPath)
            overwrite = True  # prevent additional downstream checks for this path

        def partitionToBinarySeries(kvIter):
            """ Collects all Series records in a partition into a single binary series record. """
            keypacker = None
            firstKey = None
            buf = StringIO.StringIO()
            for seriesKey, series in kvIter:
                if keypacker is None:
                    keypacker = struct.Struct('h' * len(seriesKey))
                    firstKey = seriesKey
                # print >> sys.stderr, seriesKey, series, series.tostring().encode('hex')
                buf.write(keypacker.pack(*seriesKey))
                buf.write(series.tostring())
            val = buf.getvalue()
            buf.close()
            # we might have an empty partition, in which case firstKey will still be None
            if firstKey is None:
                return iter([])
            else:
                label = SimpleBlocks.getBinarySeriesNameForKey(
                    firstKey) + ".bin"
                return iter([(label, val)])

        awsCredentials = AWSCredentials.fromContext(self.rdd.ctx)
        writer = getParallelWriterForPath(outputDirPath)(
            outputDirPath,
            overwrite=overwrite,
            awsCredentialsOverride=awsCredentials)

        binseriesrdd = self.rdd.mapPartitions(partitionToBinarySeries)

        binseriesrdd.foreach(writer.writerFcn)

        # TODO: all we really need here are the number of keys and number of values, which could in principle
        # be cached in _nkeys and _nvals attributes, removing the need for this .first() call in most cases.
        firstKey, firstVal = self.first()
        writeSeriesConfig(outputDirPath,
                          len(firstKey),
                          len(firstVal),
                          keyType='int16',
                          valueType=self.dtype,
                          overwrite=overwrite,
                          awsCredentialsOverride=awsCredentials)

示例#14

0

显示文件

文件： series.py 项目： MiguelPeralvo/thunder

    def saveAsBinarySeries(self, outputDirPath, overwrite=False):
        """
        Writes out Series-formatted data.

        This method (Series.saveAsBinarySeries) writes out binary series files using the current partitioning
        of this Series object. (That is, if mySeries.rdd.getNumPartitions() == 5, then 5 files will be written
        out, one per partition.) The records will not be resorted; the file names for each partition will be
        taken from the key of the first Series record in that partition. If the Series object is already
        sorted and no records have been removed by filtering, then the resulting output should be equivalent
        to what one would get from calling myImages.saveAsBinarySeries().

        If all one wishes to do is to save out Images data in a binary series format, then
        tsc.convertImagesToSeries() will likely be more efficient than
        tsc.loadImages().toSeries().saveAsBinarySeries().

        Parameters
        ----------
        outputDirPath : string path or URI to directory to be created
            Output files will be written underneath outputdirname. This directory must not yet exist
            (unless overwrite is True), and must be no more than one level beneath an existing directory.
            It will be created as a result of this call.

        overwrite : bool
            If true, outputdirname and all its contents will be deleted and recreated as part
            of this call.
        """
        import cStringIO as StringIO
        import struct
        from thunder.rdds.imgblocks.blocks import SimpleBlocks
        from thunder.rdds.fileio.writers import getParallelWriterForPath
        from thunder.rdds.fileio.seriesloader import writeSeriesConfig
        from thunder.utils.common import AWSCredentials

        if not overwrite:
            self._checkOverwrite(outputDirPath)
            overwrite = True  # prevent additional downstream checks for this path

        def partitionToBinarySeries(kvIter):
            """ Collects all Series records in a partition into a single binary series record. """
            keypacker = None
            firstKey = None
            buf = StringIO.StringIO()
            for seriesKey, series in kvIter:
                if keypacker is None:
                    keypacker = struct.Struct('h'*len(seriesKey))
                    firstKey = seriesKey
                # print >> sys.stderr, seriesKey, series, series.tostring().encode('hex')
                buf.write(keypacker.pack(*seriesKey))
                buf.write(series.tostring())
            val = buf.getvalue()
            buf.close()
            # we might have an empty partition, in which case firstKey will still be None
            if firstKey is None:
                return iter([])
            else:
                label = SimpleBlocks.getBinarySeriesNameForKey(firstKey) + ".bin"
                return iter([(label, val)])

        awsCredentials = AWSCredentials.fromContext(self.rdd.ctx)
        writer = getParallelWriterForPath(outputDirPath)(outputDirPath, overwrite=overwrite,
                                                         awsCredentialsOverride=awsCredentials)

        binseriesrdd = self.rdd.mapPartitions(partitionToBinarySeries)

        binseriesrdd.foreach(writer.writerFcn)

        # TODO: all we really need here are the number of keys and number of values, which could in principle
        # be cached in _nkeys and _nvals attributes, removing the need for this .first() call in most cases.
        firstKey, firstVal = self.first()
        writeSeriesConfig(outputDirPath, len(firstKey), len(firstVal), keyType='int16', valueType=self.dtype,
                          overwrite=overwrite, awsCredentialsOverride=awsCredentials)