Exemplos de TransformProcess em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: pydatavec

Classe / Tipo: TransformProcess

Exemplos em hotexamples.com: 18

TransformProcess em Python - 18 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de pydatavec.TransformProcess em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

TransformProcess(14)

Métodos Frequentes

TransformProcess (14)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_reduce.py Projeto: akhodakivskiy/deeplearning4j

def test_reduce_4(): reductions = ['first', 'last', 'append', 'prepend', 'count', 'count_unique'] for red in reductions: schema = Schema() schema.add_string_column('col1') schema.add_string_column('col2') tp = TransformProcess(schema) tp.reduce('col1', red) tp.to_java()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_reduce.py Projeto: akhodakivskiy/deeplearning4j

def test_reduce_1(): reductions = ['sum', 'mean', 'std', 'var', 'prod'] for red in reductions: schema = Schema() schema.add_string_column('name') schema.add_double_column('amount') schema.add_integer_column('hours') tp = TransformProcess(schema) tp.reduce('name', red) tp.to_java()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_derive_col_from_time(): schema = Schema() schema.add_string_column('str1') schema.add_string_column('str2') tp = TransformProcess(schema) tp.string_to_time('str1') tp.derive_column_from_time('str1', 'hour', 'hour_of_day') assert 'hour' in tp.final_schema.columns tp.to_java()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_replace_empty(): schema = Schema() schema.add_string_column('str1') tp = TransformProcess(schema) tp.replace_empty_string('str1', 'xx') tp.to_java()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_remove_white_spaces(): schema = Schema() schema.add_string_column('str1') tp = TransformProcess(schema) tp.remove_white_spaces('str1') tp.to_java()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_lower(): schema = Schema() schema.add_string_column('str1') tp = TransformProcess(schema) tp.lower('str1') tp.to_java()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_append_string(): schema = Schema() schema.add_string_column('str1') tp = TransformProcess(schema) tp.append_string('str1', 'xxx') tp.to_java()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_cat_to_int(): schema = Schema() schema.add_categorical_column('cat', ['A', 'B', 'C']) tp = TransformProcess(schema) tp.categorical_to_integer('cat') assert tp.final_schema.get_column_type('cat') == 'integer' tp.to_java()

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_rename(): schema = Schema() schema.add_string_column('str1') tp = TransformProcess(schema) tp.rename_column('str1', 'str2') assert 'str1' not in tp.final_schema.columns assert 'str2' in tp.final_schema.columns tp.to_java()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_remove(): schema = Schema() schema.add_string_column('str1') schema.add_string_column('str2') tp = TransformProcess(schema) tp.remove_column('str1') assert tp.final_schema.columns.keys() == ['str2'] tp.to_java()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_concat(): schema = Schema() schema.add_string_column('str1') schema.add_string_column('str2') tp = TransformProcess(schema) tp.concat(['str1', 'str2'], 'str3') assert 'str3' in tp.final_schema.columns tp.to_java()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_transform_process.py Projeto: rghwer/testdocs

def test_str_to_time(): schema = Schema() schema.add_string_column('str1') schema.add_string_column('str2') tp = TransformProcess(schema) tp.string_to_time('str1') assert tp.final_schema.get_column_type('str1') == 'DateTime' tp.to_java()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_reduce.py Projeto: eric-erki/deeplearning4j

def test_reduce_1(): reductions = ['sum', 'mean', 'std', 'var', 'prod'] for red in reductions: schema = Schema() schema.add_string_column('name') schema.add_double_column('amount') schema.add_integer_column('hours') tp = TransformProcess(schema) tp.reduce('name', red) tp.to_java()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_reduce.py Projeto: taotesea/EclipseDeeplearning4j

def test_reduce_4(): reductions = ['first', 'last', 'append', 'prepend', 'count', 'count_unique'] for red in reductions: schema = Schema() schema.add_string_column('col1') schema.add_string_column('col2') tp = TransformProcess(schema) tp.reduce('col1', red) tp.to_java()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: train_model_and_transform.py Projeto: wangfeng-skymind/skil-python

# We use pyspark to filter empty lines sc = pyspark.SparkContext(master='local[*]', appName='iris') data = sc.textFile('iris.data') filtered_data = data.filter(lambda d: len(d) > 0) # Define Input Schema input_schema = Schema() input_schema.add_double_column('Sepal length') input_schema.add_double_column('Sepal width') input_schema.add_double_column('Petal length') input_schema.add_double_column('Petal width') input_schema.add_categorical_column( "Species", ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]) # Define Transform Process tp = TransformProcess(input_schema) tp.one_hot("Species") # Do the transformation on spark and convert to numpy output = tp(filtered_data) np_array = np.array([[float(i) for i in x.split(',')] for x in output]) x = np_array[:, :-3] y = np_array[:, -3:] # Build the Keras model model = Sequential() model.add(Dense(10, input_shape=(4,), activation='relu', name='fc1')) model.add(Dense(10, activation='relu', name='fc2')) model.add(Dense(3, activation='softmax', name='output')) optimizer = Adam(lr=0.001)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: reduction.py Projeto: vishalbelsare/pydatavec

each person has entered each country. ''' from pydatavec import Schema, TransformProcess # Define the input schema schema = Schema() schema.add_string_column('person') schema.add_categorical_column('country_visited', ['USA', 'Japan', 'China', 'India']) schema.add_string_column('entry_time') # Define the operations we want to do tp = TransformProcess(schema) # Parse date-time # Format for parsing times is as per http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html tp.string_to_time('entry_time', 'YYYY/MM/dd') # Take the "country_visited" column and expand it to a one-hot representation # So, "USA" becomes [1,0,0,0], "Japan" becomes [0,1,0,0], "China" becomes [0,0,1,0] etc tp.one_hot('country_visited') # For each person, reduce all columns using `sum` op, except for entry_time; reduce it using `max` op : tp.reduce('person', 'sum', {'entry_time': 'max'}) # Rename column

Exemplo n.º 17

0

Exibir arquivo

input_schema.add_categorical_column("MerchantCountryCode", ["USA", "CAN", "FR", "MX"]) # Some columns have restrictions on the allowable values, that we consider valid: input_schema.add_double_column( "TransactionAmountUSD", 0.0, None, False, False) # $0.0 or more, no maximum limit, no NaN and no Infinite values input_schema.add_categorical_column("FraudLabel", ["Fraud", "Legit"]) # Lets define some operations to execute on the data... # We do this by defining a TransformProcess # At each step, we identify column by the name we gave them in the input data schema, above tp = TransformProcess(input_schema) # Let's remove some column we don't need tp.remove_column("CustomerID") tp.remove_column("MerchantID") # Now, suppose we only want to analyze transactions involving merchants in USA or Canada. Let's filter out # everything except for those countries. # Here, we are applying a conditional filter. We remove all of the examples that match the condition # The condition is "MerchantCountryCode" isn't one of {"USA", "CAN"} tp.filter(NotInSet("MerchantCountryCode", ["USA", "CAN"])) # Let's suppose our data source isn't perfect, and we have some invalid data: negative dollar amounts that we want to replace with 0.0 # For positive dollar amounts, we don't want to modify those values

Exemplo n.º 18

0

Exibir arquivo

Arquivo: iris.py Projeto: vishalbelsare/pydatavec

temp_filename = filename + '_temp' url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" if not os.path.isfile(filename): if os.path.isfile(temp_filename): os.remove(temp_filename) download_file(url, temp_filename) os.rename(temp_filename, filename) # We use pyspark to filter empty lines sc = pyspark.SparkContext(master='local[*]', appName='iris') data = sc.textFile('iris.data') filtered_data = data.filter(lambda x: len(x) > 0) # Define Input Schema input_schema = Schema() input_schema.add_double_column('Sepal length') input_schema.add_double_column('Sepal width') input_schema.add_double_column('Petal length') input_schema.add_double_column('Petal width') input_schema.add_categorical_column("Species", ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]) # Define Transform Process tp = TransformProcess(input_schema) tp.categorical_to_integer("Species") # Do the transformation on spark output = tp(filtered_data) print(list(output))

Principais funções | Principais classes | Política de Privacidade | Advertise with us

PHP | C# (CSharp) | Java | Golang | C++ (Cpp) | Python | JavaScript | TypeScript

EN | RU | DE | FR | ES | PT | IT | JP | ZH | KO