def transfer_monitor(): for connection in all_connections: connection.run('rm monitor.py') connection.run('rm -rf logs') transfer = Transfer(connection) transfer.put('monitor.py') connection.run('mkdir logs')
def trainModel(): # Transfer package transfer = Transfer(master) transferKafka = Transfer(kafka) # Transfer datagenerator transferKafka.put('./kafkaProducer.py') # start kafka startKafka() # start spark cluster startSparkCluster() # Create Package os.system('sbt package') # Transfer files to master transferKafka.get('/home/ronald/random_centers.csv') transfer.put('./random_centers.csv') transferKafka.get('/home/ronald/centers.csv') transfer.put('./centers.csv') transferKafka.get('/home/ronald/data.csv') transfer.put('./data.csv') # Transfer spark application transfer.put( './target/scala-2.12/streamingkmeansmodeltrained_2.12-0.1.jar') master.run( 'source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.StreamingKMeansModelTraining ' '--master spark://' + str(masterHost) + ':7077 --executor-memory 2g ' '~/streamingkmeansmodeltrained_2.12-0.1.jar ' '192.168.122.121:9092 ' 'consumer-group ' 'test') runChecker() stop()
def example_streaming_kmeans(): transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class com.example.kmeans.KMeansExample ' '--master spark://' + str(remote_host) + ':7077 ' '--executor-memory 2g ~/spark_example_2.12-0.1.jar')
def stopProducer(): try: producer.run('tmux kill-session -t socket') transfer = Transfer(producer) transfer.put('./retrieveProducerOutput.py') producer.run('python3 ~/retrieveProducerOutput.py') transfer.get('producerResult.txt') producer.run('rm ~/data/_*') except: print('Socket already closed!')
def example_uber(): # temporary transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class uber.KMeansUber ' '--master spark://' + str(remote_host) + ':7077 ' '--executor-memory 2g ~/spark_example_2.12-0.1.jar')
def example_streaming(): # temporary transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/StreamingModeSpark/target/scala-2.12/streamingmodespark_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class example.stream.StructureStreaming ' '--master spark://' + str(remote_host) + ':7077 ' '--deploy-mode cluster ' '--executor-memory 100g ' '~/streamingmodespark_2.12-0.1.jar')
def example_datagenerator(): transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/StreamingModeSpark/target/scala-2.12/streamingmodespark_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.0 ' '--class example.stream.DataGenerator ' '~/streamingmodespark_2.12-0.1.jar ' '10000 ' '~/100-bytes-lines.txt ' '100')
def example_kafka_trial(): transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) # transfer.put('/Users/ronnie/Documents/datagenerator/kafka_producer_example.py') c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.DirectKafkaWordCount ' '~/spark_example_2.12-0.1.jar ' 'localhost:9092 ' 'consumer-group ' 'test')
def example_streaming_kmeans(): # transfer package transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) # transfer sample files c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.StreamingKMeansModelExample ' '~/spark_example_2.12-0.1.jar ' 'localhost:9092 ' 'consumer-group ' 'test')
def test_networkwordcount(): # Transfer package transfer = Transfer(master) # transfer.put('/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar') # master.run('rm -rf kmeansModel') transfer.put('./socketProducerExample.py') start_datagenerator() master.run( 'source /etc/profile && cd $SPARK_HOME && bin/run-example ' # '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' 'org.apache.spark.examples.streaming.NetworkWordCount ' # '--master spark://' + str(master_host) + ':7077 --executor-memory 2g ' # '~/spark_example_2.12-0.1.jar ' 'localhost ' '9999' # 'test' )
def testStructuredNetworkWordCount(): # Transfer package transfer = Transfer(master) # transfer.put('/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar') # master.run('rm -rf kmeansModel') transfer.put('./socketProducerExample.py') start_spark_cluster() start_datagenerator() master.run( 'source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class org.apache.spark.examples.sql.streaming.StructuredNetworkWordCount ' '--master spark://' + str(master_host) + ':7077 ' # '--deploy-mode cluster ' # '--supervise ' '--executor-memory 2g ' 'examples/jars/spark-examples_2.12-3.0.0.jar ' 'localhost ' '9999' # 'test' )
def runExperiment(clusters='3', numPorts='2', time='60000', executorMem='2g', batchDuration='1'): # transfer file transfer = Transfer(master) producerTransfer = Transfer(producer) # Start Monitors transferMonitor() startMonitor() # Transfer Producer producerTransfer.put('./producer.py') startProducer(numPorts) # SBT packaging os.system('sbt package') # start start cluster startSparkCluster(clusters) # transfer jar transfer.put( './target/scala-2.12/socketstreamingkmeansexperiment_2.12-0.1.jar') try: master.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class Experiment ' '--master spark://' + str(masterHost) + ':7077 --executor-memory ' + executorMem + ' ' '~/socketstreamingkmeansexperiment_2.12-0.1.jar ' '192.168.122.153 ' '10000 ' + numPorts + ' ' + time + ' ' + batchDuration) except: print('Spark Crashed while running') print('Application stopped at: {}'.format( datetime.now().strftime("%H:%M:%S.%f"))) finally: # transfer logs stopMonitor() transferLogs() # Restart all VMs stop()
def streaming_kmeans(): # Create Package os.system('sbt package') # Transfer package transfer = Transfer(master) transfer.put('./target/scala-2.12/spark_example_2.12-0.1.jar') # Transfer datagenerator transfer.put('./socketProducerExample.py') # start spark cluster start_spark_cluster() start_datagenerator() # start kafka # start_kafka() master.run('rm -rf kmeansModel') master.run( 'source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' # '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.StreamingKMeansModelExample ' # '--master spark://' + str(master_host) + ':7077 --executor-memory 2g ' '~/spark_example_2.12-0.1.jar ' 'localhost ' '9999' # 'test' )
def transferToProducer(filename): transfer = Transfer(producer) transfer.put(filename)
def createFiles(): transfer = Transfer(producer) transfer.put('createFiles.py') producer.run('python3 createFiles.py 2500 20000 6')
def transfer_to_all(filename): for connection in all_connections: transfer = Transfer(connection) transfer.put(filename)
def transferToMaster(filename): transfer = Transfer(master) transfer.put(filename)
def createFilesWeibull(): transfer = Transfer(producer) transfer.put('createFilesWeibull.py') producer.run('python3 createFilesWeibull.py 10. 50000 6 300')
def transfer_file_to(filename): transfer = Transfer(c2) transfer.put(filename)
def testKafka(n='100'): transfer = Transfer(kafka) transfer.put('./kafkaProducer.py') transfer.put('./kafkaConsumer.py') startKafka(n)
def transferToKafka(filename): transfer = Transfer(kafka) transfer.put(filename)
def runChecker(): # transfer checker transfer = Transfer(master) transfer.put('./checker.py') master.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '~/checker.py')
def transferMonitor(): for connection in slaveConnections + [master, producer]: connection.run('rm -rf logs') transfer = Transfer(connection) transfer.put('monitor.py') connection.run('mkdir logs')
def closeMonitorPs(): for connection in slaveConnections + [master, producer]: transfer = Transfer(connection) transfer.put('closeMonitorPs.sh') connection.run('chmod u+x closeMonitorPs.sh') connection.run('./closeMonitorPs.sh')