_visit_list.cache() # # Get all order dates between START_DATE_ORDER and END_DATE_ORDER # ORDER_DATES = get_order_dates_between(start_date=START_DATE_ORDER, end_date=END_DATE_ORDER) ORDER_DATES = ["".join(sys.argv[1])] print ORDER_DATES # Loop through all order dates to generate invoices of each date. for order_date in ORDER_DATES: print( "************************************************************************************" ) print("Generating Invoice for " + order_date) print( "************************************************************************************" ) _generate_invoice(sc=sc, sqlContext=sqlContext, visit_list=_visit_list, order_date=order_date) print( "************************************************************************************" ) # Clearing Cache sqlContext.clearCache() # Stopping SparkContext sc.stop()
def sql_hive_context_example(spark): # create hive context object. hive_ctx = HiveContext(spark.sparkContext) # createDataFrame l = [('Alice', 18), ('Bob', 20), ('Charley', 22)] df = hive_ctx.createDataFrame(l, ('name', 'age')) print("createDataFrame API finished") # registerDataFrameAsTable hive_ctx.registerDataFrameAsTable(df, "table1") print("registerDataFrameAsTable API finished") # sql tmp_df = hive_ctx.sql("select * from table1") tmp_df.show() print("sql API finished") # table tmp_df = hive_ctx.table("table1") tmp_df.show() print("table API finished") # tableNames table_names = hive_ctx.tableNames() print(table_names) print("tableNames API finished") # tables tables = hive_ctx.tables() print(tables) print("tables API finished") # range tmp_df = hive_ctx.range(1,10,2) tmp_df.show() print("range API finished") # dropTempTable hive_ctx.dropTempTable("table1") table_names = hive_ctx.tableNames() print(table_names) print("dropTempTable API finished") # cacheTable & uncacheTable & clearCache df = hive_ctx.range(1,10,2) hive_ctx.registerDataFrameAsTable(df, "table") hive_ctx.cacheTable("table") hive_ctx.uncacheTable("table") hive_ctx.clearCache() print("cacheTable & uncacheTable & clearCache API finished") # createExternalTable # newSession # registerFunction # Deprecated in 2.3.0. Use :func:`spark.udf.register` instead # registerJavaFunction # Deprecated in 2.3.0. Use :func:`spark.udf.registerJavaFunction` instead # setConf & getConf hive_ctx.setConf("key1", "value1") value = hive_ctx.getConf("key1") print(value) print("setConf & getConf API finished") # refreshTable # Exception: An error occurred while calling o26.refreshTable: # Method refreshTable([class java.lang.String]) does not exist print("Finish running HiveContext API")
map_hooray = folium.Map([13.820000, 109.000000], zoom_start=10) for i in range(0, len(loc)): pop_up = loc['mine_status'][i] + '\n' + 'lat:' + str( round(loc['latitude'][i], 6)) + '\n' + 'long:' + str( round(loc['longitude'][i], 6)) folium.Marker([loc['latitude'][i], loc['longitude'][i]], popup=pop_up).add_to(map_hooray) map_hooray.save("MinesLocation.html") print('Map saved in the home directory') print("..............Summary Report....................\n") print("Total messages: " + str(ObservationCount)) print("No. of High Priority Land Mines:" + ' ' + str(HighAlarmCount)) print("No. of Moderate Priority Land Mines:" + ' ' + str(ModerateAlarmCount)) print("No. of Low Priority Land Mines:" + ' ' + str(LowAlarmCount)) print("No. of False Alarms:" + ' ' + str(FalseAlarmCount)) spark.dropTempTable("MinedSensorTable") jsonRDDString.unpersist() jsonRDD.unpersist() except Exception as e: pass lines.foreachRDD(processLandMinesRDD) spark.clearCache() ssc.start() ssc.awaitTermination()