Exemplos de SeriesLoader.fromBinary em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: lambdaimage.rdds.fileio.seriesloader

Classe / Tipo: SeriesLoader

Método / Função: fromBinary

Exemplos em hotexamples.com: 2

SeriesLoader.fromBinary em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de lambdaimage.rdds.fileio.seriesloader.SeriesLoader.fromBinary em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

pack(3)

collect(2)

fromBinary(2)

astype(1)

fromArrays(1)

fromMatLocal(1)

fromText(1)

maxProject(1)

Métodos Frequentes

pack (3)

collect (2)

fromBinary (2)

astype (1)

fromArrays (1)

fromMatLocal (1)

fromText (1)

maxProject (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_seriesloader.py Projeto: merlinzone/lambda-image

def _run_tst_fromBinary(self, useConfJson=False): # run this as a single big test so as to avoid repeated setUp and tearDown of the spark context # data will be a sequence of test data # all keys and all values in a test data item must be of the same length # keys get converted to ints regardless of raw input format DATA = [ SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11, 12, 13]], 'int16', 'int16'), SeriesBinaryTestData.fromArrays([[1, 2, 3], [5, 6, 7]], [[11], [12]], 'int16', 'int16'), SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11, 12, 13]], 'int16', 'int32'), SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11, 12, 13]], 'int32', 'int16'), SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11.0, 12.0, 13.0]], 'int16', 'float32'), SeriesBinaryTestData.fromArrays([[1, 2, 3]], [[11.0, 12.0, 13.0]], 'float32', 'float32'), SeriesBinaryTestData.fromArrays([[2, 3, 4]], [[11.0, 12.0, 13.0]], 'float32', 'float32'), ] for itemidx, item in enumerate(DATA): outSubdir = os.path.join(self.outputdir, 'input%d' % itemidx) os.mkdir(outSubdir) fname = os.path.join(outSubdir, 'inputfile%d.bin' % itemidx) with open(fname, 'wb') as f: item.writeToFile(f) loader = SeriesLoader(self.sc) if not useConfJson: series = loader.fromBinary(outSubdir, nkeys=item.nkeys, nvalues=item.nvals, keyType=str(item.keyDtype), valueType=str(item.valDtype)) else: # write configuration file conf = {'input': outSubdir, 'nkeys': item.nkeys, 'nvalues': item.nvals, 'valuetype': str(item.valDtype), 'keytype': str(item.keyDtype)} with open(os.path.join(outSubdir, "conf.json"), 'wb') as f: json.dump(conf, f, indent=2) series = loader.fromBinary(outSubdir) seriesData = series.rdd.collect() expectedData = item.data assert_equals(len(expectedData), len(seriesData), "Differing numbers of k/v pairs in item %d; expected %d, got %d" % (itemidx, len(expectedData), len(seriesData))) for expected, actual in zip(expectedData, seriesData): expectedKeys = tuple(expected[0]) expectedType = smallestFloatType(item.valDtype) expectedVals = array(expected[1], dtype=expectedType) assert_equals(expectedKeys, actual[0], "Key mismatch in item %d; expected %s, got %s" % (itemidx, str(expectedKeys), str(actual[0]))) assert_true(allclose(expectedVals, actual[1]), "Value mismatch in item %d; expected %s, got %s" % (itemidx, str(expectedVals), str(actual[1]))) assert_equals(expectedType, str(actual[1].dtype), "Value type mismatch in item %d; expected %s, got %s" % (itemidx, expectedType, str(actual[1].dtype)))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: context.py Projeto: genialwang/lambda-image

def loadSeries(self, dataPath, nkeys=None, nvalues=None, inputFormat='binary', minPartitions=None, maxPartitionSize='32mb', confFilename='conf.json', keyType=None, valueType=None, keyPath=None, varName=None): """ Loads a Series object from data stored as binary, text, npy, or mat. For binary and text, supports single files or multiple files stored on a local file system, a networked file system (mounted and available on all cluster nodes), Amazon S3, or HDFS. For local formats (npy and mat) only local file systems currently supported. Parameters ---------- dataPath: string Path to data files or directory, as either a local filesystem path or a URI. May include a single '*' wildcard in the filename. Examples of valid dataPaths include 'local/directory/*.stack", "s3n:///my-s3-bucket/data/", or "file:///mnt/another/directory/". nkeys: int, optional (required if `inputFormat` is 'text'), default = None Number of keys per record (e.g. 3 for (x, y, z) coordinate keys). Must be specified for text data; can be specified here or in a configuration file for binary data. nvalues: int, optional (required if `inputFormat` is 'text') Number of values per record. Must be specified here or in a configuration file for binary data. inputFormat: {'text', 'binary', 'npy', 'mat'}. optional, default = 'binary' inputFormat of data to be read. minPartitions: int, optional, default = SparkContext.minParallelism Minimum number of Spark partitions to use, only for text. maxPartitionSize : int, optional, default = '32mb' Maximum size of partitions as a Java-style memory string, e.g. '32mb' or '64mb', indirectly controls the number of Spark partitions, only for binary. confFilename: string, optional, default 'conf.json' Path to JSON file with configuration options including 'nkeys', 'nvalues', 'keyType', and 'valueType'. If a file is not found at the given path, then the base directory in 'dataPath' will be checked. Parameters will override the conf file. keyType: string or numpy dtype, optional, default = None Numerical type of keys, will override conf file. valueType: string or numpy dtype, optional, default = None Numerical type of values, will override conf file. keyPath: string, optional, default = None Path to file with keys when loading from npy or mat. varName : str, optional, default = None Variable name to load (for MAT files only) Returns ------- data: lambdaimage.rdds.Series A Series object, wrapping an RDD, with (n-tuples of ints) : (numpy array) pairs """ checkParams(inputFormat, ['text', 'binary', 'npy', 'mat']) from lambdaimage.rdds.fileio.seriesloader import SeriesLoader loader = SeriesLoader(self._sc, minPartitions=minPartitions) if inputFormat.lower() == 'binary': data = loader.fromBinary(dataPath, confFilename=confFilename, nkeys=nkeys, nvalues=nvalues, keyType=keyType, valueType=valueType, maxPartitionSize=maxPartitionSize) elif inputFormat.lower() == 'text': if nkeys is None: raise Exception('Must provide number of keys per record for loading from text') data = loader.fromText(dataPath, nkeys=nkeys) elif inputFormat.lower() == 'npy': data = loader.fromNpyLocal(dataPath, keyPath) else: if varName is None: raise Exception('Must provide variable name for loading MAT files') data = loader.fromMatLocal(dataPath, varName, keyPath) return data