Python SparkFiles.get примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyspark.files

Класс/Тип: SparkFiles

Метод/Функция: get

Примеров на hotexamples.com: 12

Python SparkFiles.get - 12 примеров найдено. Это лучшие примеры Python кода для pyspark.files.SparkFiles.get, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getRootDirectory(14)

get(11)

Основные методы

getRootDirectory (14)

get (11)

Пример #1

Показать файл

    def subQuery(ct):
        serialized = example.Serialized()
        example.ReadSerializationFromFile(SparkFiles.get('cryptocontext.cc'), serialized)
        cryptoContext = example.CryptoContextFactory.DeserializeAndCreateContext(serialized, False)

        example.ReadSerializationFromFile(SparkFiles.get('query.enc'), serialized)
        queryCT = cryptoContext.deserializeCiphertext(serialized)

        example.StringToSerialization(ct, serialized)
        dfCT = cryptoContext.deserializeCiphertext(serialized)
        
        result = cryptoContext.EvalSub(dfCT, queryCT)

        """
        # Verify that one of them subs to 0
        example.ReadSerializationFromFile(SparkFiles.get('key.sec'), serialized)
        secKey = cryptoContext.deserializeSecretKey(serialized)
        plaintextDec = example.IntPlaintextEncoding([])
        decryptResult = cryptoContext.Decrypt(secKey, [result], plaintextDec, True)
        plaintextDec = example.decodeInts(plaintextDec)
        plaintextDec = plaintextDec[:decryptResult.messageLength]
        print(all(v == 0 for v in plaintextDec))
        """

        result.Serialize(serialized)
        return example.SerializationToString(serialized, '')[1]

Пример #2

Показать файл

Файл: tests.py Проект: baontq/incubator-spark

 def test_add_file_locally(self):
     path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
     self.sc.addFile(path)
     download_path = SparkFiles.get("hello.txt")
     self.assertNotEqual(path, download_path)
     with open(download_path) as test_file:
         self.assertEquals("Hello World!\n", test_file.readline())

Пример #3

Показать файл

Файл: tests.py Проект: lizhanyang505/spark

 def test_add_file_locally(self):
     path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
     self.sc.addFile(path)
     download_path = SparkFiles.get("hello.txt")
     self.assertNotEqual(path, download_path)
     with open(download_path) as test_file:
         self.assertEquals("Hello World!\n", test_file.readline())

Пример #4

Показать файл

Файл: _spark_utils.py Проект: szczeles/mlflow

    def get_or_extract(archive_path):
        """Given a path returned by add_local_model(), this method will return a tuple of
        (loaded_model, local_model_path).
        If this Python process ever loaded the model before, we will reuse that copy.
        """
        from pyspark.files import SparkFiles
        import zipfile

        if archive_path in _SparkDirectoryDistributor._extracted_dir_paths:
            return _SparkDirectoryDistributor._extracted_dir_paths[
                archive_path]

        # BUG: Despite the documentation of SparkContext.addFile() and SparkFiles.get() in Scala
        # and Python, it turns out that we actually need to use the basename as the input to
        # SparkFiles.get(), as opposed to the (absolute) path.
        archive_path_basename = os.path.basename(archive_path)
        local_path = SparkFiles.get(archive_path_basename)
        temp_dir = tempfile.mkdtemp()
        zip_ref = zipfile.ZipFile(local_path, "r")
        zip_ref.extractall(temp_dir)
        zip_ref.close()

        _SparkDirectoryDistributor._extracted_dir_paths[
            archive_path] = temp_dir
        return _SparkDirectoryDistributor._extracted_dir_paths[archive_path]

Пример #5

Показать файл

    def mult(ct1, ct2):
        serialized = example.Serialized()
        example.ReadSerializationFromFile(SparkFiles.get('cryptocontext.cc'), serialized)
        cryptoContext = example.CryptoContextFactory.DeserializeAndCreateContext(serialized, False)

        ct1 = ct1.value
        example.StringToSerialization(ct1, serialized)
        ct1 = cryptoContext.deserializeCiphertext(serialized)

        ct2 = ct2.value
        example.StringToSerialization(ct2, serialized)
        ct2 = cryptoContext.deserializeCiphertext(serialized)

        result = cryptoContext.EvalMult(ct1, ct2)

        """
        # Verify that one of them mults to 0
        example.ReadSerializationFromFile(SparkFiles.get('key.sec'), serialized)
        secKey = cryptoContext.deserializeSecretKey(serialized)
        plaintextDec = example.IntPlaintextEncoding([])
        decryptResult = cryptoContext.Decrypt(secKey, [result], plaintextDec, True)
        plaintextDec = example.decodeInts(plaintextDec)
        plaintextDec = plaintextDec[:decryptResult.messageLength]
        print(all(v == 0 for v in plaintextDec))
        """

        result.Serialize(serialized)
        return Row(value=example.SerializationToString(serialized, '')[1])

Пример #6

Показать файл

Файл: predict.py Проект: zhenglm/dlflow

        def predict_map(rdd):
            from pyspark.files import SparkFiles

            config = bc_config.value
            fmap = bc_fmap.value
            static_dir = SparkFiles.get(bc_static_model_dir.value)
            ckpt_dir = SparkFiles.get("ckpt")

            from dlflow.mgr import Collector, model
            collect = Collector()
            collect(static_dir, "Static models")

            input_cls = model[config.MODEL.input_name]
            dataset = input_cls(fmap).rdd_inputs(rdd, config.MODEL.batch_size)

            model_cls = model[config.MODEL.model_name]
            model_ins = model_cls(fmap)
            model_ins.load_model(ckpt_dir)

            return model_ins.predict_act(dataset)

Пример #7

Показать файл

 def execute(self):
     """
     Overrides the execute method of the Reader class to read the file from Http/Https location.
     :return: pyspark.sql.DataFrame
                     Returns a Spark's DataFrame with the data from the specified file.
     """
     try:
         spark_context = self.spark.sparkContext
         spark_context.addFile(self.location)
         return self.spark.read.format(self.file_format) \
             .load(SparkFiles.get(self.location.split('/')[-1]))
     except AnalysisException as exp:
         raise

Пример #8

Показать файл

Файл: spark_model_cache.py Проект: zhangf911/mlflow

    def get_or_load(archive_path):
        """Given a path returned by add_local_model(), this method will return the loaded model.
        If this Python process ever loaded the model before, we will reuse that copy.
        """
        if archive_path in SparkModelCache._models:
            SparkModelCache._cache_hits += 1
            return SparkModelCache._models[archive_path]

        local_path = SparkFiles.get(archive_path)
        temp_dir = tempfile.mkdtemp()
        zip_ref = zipfile.ZipFile(local_path, 'r')
        zip_ref.extractall(temp_dir)
        zip_ref.close()

        # We must rely on a supposed cyclic import here because we want this behavior
        # on the Spark Executors (i.e., don't try to pickle the load_model function).
        from mlflow.pyfunc import load_pyfunc  # pylint: disable=cyclic-import
        SparkModelCache._models[archive_path] = load_pyfunc(temp_dir)
        return SparkModelCache._models[archive_path]

Пример #9

Показать файл

Файл: spark_model_cache.py Проект: anjosma/mlflow_experiments

    def get_or_load(archive_path):
        """Given a path returned by add_local_model(), this method will return the loaded model.
        If this Python process ever loaded the model before, we will reuse that copy.
        """
        if archive_path in SparkModelCache._models:
            SparkModelCache._cache_hits += 1
            return SparkModelCache._models[archive_path]

        # BUG: Despite the documentation of SparkContext.addFile() and SparkFiles.get() in Scala
        # and Python, it turns out that we actually need to use the basename as the input to
        # SparkFiles.get(), as opposed to the (absolute) path.
        archive_path_basename = os.path.basename(archive_path)
        local_path = SparkFiles.get(archive_path_basename)
        temp_dir = tempfile.mkdtemp()
        zip_ref = zipfile.ZipFile(local_path, 'r')
        zip_ref.extractall(temp_dir)
        zip_ref.close()

        # We must rely on a supposed cyclic import here because we want this behavior
        # on the Spark Executors (i.e., don't try to pickle the load_model function).
        from mlflow.pyfunc import load_pyfunc  # pylint: disable=cyclic-import
        SparkModelCache._models[archive_path] = load_pyfunc(temp_dir)
        return SparkModelCache._models[archive_path]

Пример #10

Показать файл

Файл: formula_imager.py Проект: metaspace2020/metaspace

def get_file_path(name):
    return Path(SparkFiles.get(name))

Пример #11

Показать файл

Файл: test_nltk_ner_rdd.py Проект: shadowridgedev/fusion-spark-job-workbench

from pyspark.sql import SparkSession
from pyspark.files import SparkFiles
from nltk.tag import StanfordNERTagger
from nltk.tokenize import word_tokenize
from pyspark.sql.types import StringType, StructType, ArrayType, StructField, Row
from collections import OrderedDict
import nltk

nltk.download()

spark = SparkSession \
    .builder \
    .appName("Python NLTK example") \
    .getOrCreate()

classifier_path = SparkFiles.get("english.all.3class.distsim.crf.ser.gz")
ner_jar = SparkFiles.get("stanford-ner.jar")
st = StanfordNERTagger(classifier_path, ner_jar, encoding='utf-8')


def classify_text(row):
    text = row["body_t"]
    # Collapse array to single value field
    all_text = str.join(' ', text)
    tokenized_text = word_tokenize(all_text)
    classified_text = st.tag(tokenized_text)
    classifiers_dict = dict()
    for word, clz in classified_text:
        if clz in classifiers_dict:
            classifiers_dict[clz].add(word)
        else:

Пример #12

Показать файл

from pyspark.files import SparkFiles

if __name__ == '__main__':
    '''
        Usage: xnor_spark
    '''

    spark = SparkSession\
            .builder\
            .appName('xnor_spark')\
            .getOrCreate()

    spark.sparkContext.setLogLevel('ERROR')

    serialized = example.Serialized()
    print(SparkFiles.get('key.pub'))
    example.ReadSerializationFromFile(SparkFiles.get('key.pub'), serialized)
    m = hashlib.sha1()
    pubKey = example.SerializationToString(serialized, '')[1]
    m.update(pubKey.encode('utf-8'))
    dirName = m.hexdigest()[:3]
    print(dirName)

    # The "database"
    start = time.time()
    print('Reading files...')
    df = spark.read.text(glob.glob('./'+dirName+'/server/*.enc'))
    end = time.time()
    print('...took', end - start)
    df.show()
    print(df.rdd.getNumPartitions(), "partitions")