spark = SparkSession.builder.getOrCreate() from pyspark import SparkContext, SparkConf from pyspark.sql import SQLContext, SparkSession from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType, StringType sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]")) from pyspark.sql import SparkSession spark = SparkSession \ .builder \ .getOrCreate() #systemML !pip install https://github.com/IBM/coursera/blob/master/systemml-1.3.0-SNAPSHOT-python.tar.gz?raw=true !mkdir -p /home/dsxuser/work/systemml from systemml import MLContext, dml ml = MLContext(spark) ml.setConfigProperty("sysml.localtmpdir", "mkdir /home/dsxuser/work/systemml") print(ml.version()) if not ml.version() == '1.3.0-SNAPSHOT': raise ValueError('please upgrade to SystemML 1.3.0, or restart your Kernel (Kernel->Restart & Clear Output)')
# Please fill in the sections labelled with "###YOUR_CODE_GOES_HERE###" # # The purpose of this assignment is to learn how feature engineering boosts model performance. You will apply Discrete Fourier Transformation on the accelerometer sensor time series and therefore transforming the dataset from the time to the frequency domain. # # After that, you’ll use a classification algorithm of your choice to create a model and submit the new predictions to the grader. Done. # # Please make sure you run this notebook from an Apache Spark 2.3 notebook. # # So the first thing we need to ensure is that we are on the latest version of SystemML, which is 1.3.0 (as of 20th March'19) Please use the code block below to check if you are already on 1.3.0 or higher. 1.3 contains a necessary fix, that's we are running against the SNAPSHOT # # In[1]: from systemml import MLContext ml = MLContext(spark) ml.version() # # # If you are blow version 1.3.0, or you got the error message "No module named 'systemml'" please execute the next two code blocks and then # # # PLEASE RESTART THE KERNEL !!! # # Otherwise your changes won't take effect, just double-check every time you run this notebook if you are on SystemML 1.3 # # In[1]: get_ipython().system( 'pip install https://github.com/IBM/coursera/blob/master/systemml-1.3.0-SNAPSHOT-python.tar.gz?raw=true' )
spark = SparkSession.builder.getOrCreate() # In[5]: get_ipython().system('mkdir -p /home/dsxuser/work/systemml') # In[6]: from systemml import MLContext, dml import numpy as np import time ml = MLContext(spark) ml.setConfigProperty("sysml.localtmpdir", "mkdir /home/dsxuser/work/systemml") print(ml.version()) if not ml.version() == '1.2.0': raise ValueError( 'please upgrade to SystemML 1.2.0, or restart your Kernel (Kernel->Restart & Clear Output)' ) # Congratulations, if you see version 1.2.0, please continue with the notebook... # We use an MLContext to interface with Apache SystemML. Note that we passed a SparkSession object as parameter so SystemML now knows how to talk to the Apache Spark cluster # Now we create some large random matrices to have numpy and SystemML crunch on it # In[7]: u = np.random.rand(1000, 10000)