# #keras implimentation batch_size = 1 tsteps = 50 data_dim = 25 #epochs epochs = 50 # # #data spliting data_source_train = data_source[:100000, :] data_source_valid = data_source[100000:, :] data_target_train = data_target[:100000, :] data_target_valid = data_target[100000:, :] # #zeros padding #source data data_source_train = reshape(data_source_train, tsteps, data_dim) data_source_valid = reshape(data_source_valid, tsteps, data_dim) #target data data_target_train = reshape(data_target_train, tsteps, data_dim) data_target_valid = reshape(data_target_valid, tsteps, data_dim) #checking shape print('source data', np.shape(data_source_train)) print('target data', np.shape(data_target_train)) #model selection model = Sequential() model.add( GRU(units=70, batch_input_shape=(batch_size, tsteps, data_dim), return_sequences=True, stateful=True))
def main(argv): # Define input, output, and archive directory names. turbine_dir_name = 'reshaped_turbine' weather_dir_name = 'raw_weather' archive_dir_name = 'archived' output_dir_name = 'output' # Process raw turbine files into the eshape format. # [datetime, sensor id, value] reshape() # Define current working directory. current_dir = os.path.dirname(os.path.abspath(__file__)) # Define input, output, and archive directories. turbine_dir = os.path.join(current_dir, turbine_dir_name) weather_dir = os.path.join(current_dir, weather_dir_name) turbine_archive_dir = os.path.join(turbine_dir, archive_dir_name) weather_archive_dir = os.path.join(weather_dir, archive_dir_name) output_dir = os.path.join(current_dir, output_dir_name) # Process all of the turbine data. # Scan for all CSV files within the turbine data folder. for item in os.listdir(turbine_dir): if item.split('.')[-1].lower() == 'csv': windows = [Window('t14'), Window('t15'), Window('t16'), \ Window('t17'), Window('t18')] # Open input and output files. with open(os.path.join(turbine_dir, item)) as input_file: input_reader = csv.reader(input_file) with open(os.path.join(output_dir, item), 'wb') as output_file: output_writer = csv.writer(output_file) print 'Processing ' + item # Write header into the output file. output_writer.writerow(input_reader.next()) for row in input_reader: for window in windows: if row[1] == window.who_am_i(): # Add row if the buffer is empty. if window.size() == 0: window.append(row) continue if compare_minute(window.startTime(), row[0]) < 9: window.append(row) continue if compare_minute(window.startTime(), row[0]) == 9: window.append(row) if window.size() >= 7: output_writer.writerow([window.startTime(), window.who_am_i(), window.average()]) if compare_minute(window.startTime(), row[0]) > 9: if window.size() >= 7: output_writer.writerow([window.startTime(), window.who_am_i(), window.average()]) window.empty() window.append(row) continue window.empty() shutil.move(os.path.join(turbine_dir, item), os.path.join(turbine_archive_dir, item)) # Process all of the weather data. # Scan for all CSV files within the weather data folder. for item in os.listdir(weather_dir): if item.split('.')[-1].lower() == 'csv': window = Window('speed_avg') # Open input and output files. with open(os.path.join(weather_dir, item)) as input_file: input_reader = csv.reader(input_file) with open(os.path.join(output_dir, item), 'wb') as output_file: output_writer = csv.writer(output_file) print 'Processing ' + item input_reader.next() # Write header into the output file. output_writer.writerow(['datetime', 'value']) for row in input_reader: # Add row if the buffer is empty. if window.size() == 0: window.append([row[0], window.who_am_i(), row[2]]) continue if compare_minute(window.startTime(), row[0]) < 9: window.append([row[0], window.who_am_i(), row[2]]) continue if compare_minute(window.startTime(), row[0]) == 9: window.append([row[0], window.who_am_i(), row[2]]) if window.size() >= 7: output_writer.writerow([window.startTime(), window.average()]) if compare_minute(window.startTime(), row[0]) > 9: if window.size() >= 7: output_writer.writerow([window.startTime(), window.average()]) window.empty() window.append([row[0], window.who_am_i(), row[2]]) continue window.empty() shutil.move(os.path.join(weather_dir, item), os.path.join(weather_archive_dir, item)) return 1
import h5py import numpy as np from keras.models import model_from_json from reshape import reshape from convert_nn import convert_waveform batch_size = 1 tsteps = 50 data_dim = 25 print('loadig model') with open('model/BidirLSTM.json', 'r') as model_json: model = model_from_json(model_json.read()) model.load_weights('model/BidirLSTM.h5') f = h5py.File('data/h5/100001.h5', 'r') data_source_test = f.get('jnt') data_source_test = np.array(data_source_test) print(data_source_test.shape) data_source_test = reshape(data_source_test, tsteps, data_dim) print('Predicting') prediction_test = model.predict(data_source_test, batch_size=batch_size) print(prediction_test.shape) prediction_test = prediction_test.reshape(-1, data_dim) print(prediction_test.shape) convert_waveform(prediction_test, "'bidir_lstm.wav'")
cnnConv2LayerMatrix = re.RELU_3D( conv.convolution_RGB(cnnConv1LayerMatrix, kernelConvLayer2)) cnnConv2LayerMatrix = pool.pooling(cnnConv2LayerMatrix, poolingSizeConvLayer2, poolingStrideConvLayer2, poolType='max') # Fully connected Layer1 processing #first layer vector(7*7*36,128) fcVectorLayer1 = np.zeros((1764, 128)) initVector(fcVectorLayer1, 1) cnnConv2LayerReshape = rshape.reshape(cnnConv2LayerMatrix) #print('FullyConnectedLayer0 (reshape) = ',cnnConv2LayerReshape) cnnFullyConnectedLayer1 = matmult.matrixMult_CHn(cnnConv2LayerReshape, fcVectorLayer1) #print('FullyConnectedLayer1 = ',cnnFullyConnectedLayer1) #second layer vector(128,10) fcVectorLayer2 = np.zeros((128, 10)) initVector(fcVectorLayer2, 2) cnnFullyConnectedLayer2 = matmult.matrixMult_CHn(cnnFullyConnectedLayer1, fcVectorLayer2) #print('FullyConnectedLayer2 = ',cnnFullyConnectedLayer2)
import dash_table import dash_core_components as dcc import dash_html_components as html import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from reshape import reshape import base64 url = 'https://github.com/Mahyarazad/mahyarazad/raw/master/Base.xlsx' df = pd.read_excel(url) mapbox_secrect_key = os.environ.get('mapBox') ##### DataFrame Reshaping ##### sd = reshape(df).summation city_data = reshape(df).dict_data_gen cv = reshape(df).geo cvt = cv.drop(columns = [cv.columns[3],cv.columns[4]]) cvt['MOM Ratio'] = ((cvt[cvt.columns[-1]]- cvt[cvt.columns[-2]])/cvt[cvt.columns[-2]]).apply('{:.0%}'.format) cvt = cvt.sort_values(by=cvt.columns[3],ascending = False) ################################# north_list = [ 'As Sulaymānīyah', 'Altameem', 'Dahūk', 'Arbīl',
#garder que le rouge #kernel[0,0]=np.array([1,0,0,0,0,0,0,0,0]).reshape(3,3) #kernel[1,0]=np.array([1,0,0,0,0,0,0,0,0]).reshape(3,3) #kernel[2,0]=np.array([1,0,0,0,0,0,0,0,0]).reshape(3,3) print("start") print(time.localtime()) new_image = np.zeros((cnnLayer1Matrix.shape[0], cnnLayer1Matrix.shape[2], 1)) new_image = re.RELU_3D(conv.convolution_RGB(cnnLayer1Matrix, kernel)) new_image2 = pool.pooling(new_image, 4, 4, 'max') new_image5 = re.RELU_3D(conv.convolution_nb_kernel(new_image2, kernel2)) new_image6 = pool.pooling(new_image5, 4, 4, 'max') new_image3 = resha.reshape(new_image2) b = np.zeros((1, 1000)) new_image4 = multi.matrixMult(new_image3, b) print("end") print(time.localtime()) #print("input image") #print(cnnLayer1Matrix) #print("somme") #print(new_image) #print(new_image.shape) #print("test") #print(test_image) #print(test_image.shape)
def handle_rdd(rdd): global ss, now, cnt_in, cnt_out if not rdd.isEmpty(): data_in = "data_in.txt" data_out = "data_out.txt" def has_column(df, col): try: df[col] return True except: return False dataframe = ss.createDataFrame(rdd, Schema.tweet_data) ## data_in count with open(data_in, "a") as di: cnt_in += dataframe.count() now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") di.write(now + '\n') di.write('per 5 seconds: ' + str(dataframe.count()) + ' total count: ' + str(cnt_in) + '\n') ## remove 'matchin_rules' column dataframe = dataframe.drop('matching_rules') #dataframe.show(3) ## dataframe to JSON(list) df = dataframe.toJSON().map(lambda x: json.loads(x)).collect() ## JSON reshape return reshaped_json = reshape.reshape(df, 'covid-19') ## json(list), topic ## convert list to RDD to Json new_df = sc.parallelize(reshaped_json).map(lambda x: json.dumps(x)) ## convert Json to Dataframe new_df = ss.read.json(new_df) new_df.show(3) print() ## data_out count with open(data_out, "a") as do: cnt_out += new_df.count() now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") do.write(now + '\n') do.write('per 5 seconds: ' + str(new_df.count()) + ' total count: ' + str(cnt_out) + '\n') output_file_name = "error_message.txt" try: new_df.write \ .format("org.apache.spark.sql.cassandra") \ .mode('append') \ .options(table="tweets", keyspace="tweettrend") \ .save() except Exception as e: with open(output_file_name, "a", encoding="utf-8") as output_file: output_file.write(type(e), e) output_file.write(reshaped_json)
def main(argv): start_time = datetime.datetime.now() # Define input, output, and archive directory names. input_dir_name = 'reshaped' archive_dir_name = 'archived' output_dir_name = 'output' # Must have two input parameters. if len(argv) != 2: print 'usage: driver.py <sample size (hours)> <threshold for std>' return # Save the input parameters. sample_size = int(argv[0]) threshold = int(argv[1]) # Process raw data files into the eshape format. # [datetime, sensor id, value] reshape() # Define current working directory. current_dir = os.path.dirname(os.path.abspath(__file__)) # Define input, output, and archive directories input_dir = os.path.join(current_dir, input_dir_name) archive_dir = os.path.join(input_dir, archive_dir_name) output_dir = os.path.join(current_dir, output_dir_name) # Scans for all CSV files within the input directory. for item in os.listdir(input_dir): if item.split('.')[len(item.split('.'))-1].lower() == 'csv': windows = [Window('t14'), Window('t15'), Window('t16'), \ Window('t17'), Window('t18')] # Open input and output files. with open(os.path.join(input_dir, item)) as input_file: input_reader = csv.reader(input_file) with open(os.path.join(output_dir, item), 'wb') as output_file: output_writer = csv.writer(output_file) print 'Processing ' + item # Write header into the output file. output_writer.writerow(input_reader.next()) for row in input_reader: for window in windows: if row[1] == window.who_am_i(): # Add row if the buffer is empty. if window.size() == 0: window.append(row) continue # Add rows until the buffer contains points # within the window size. if compare_hour(window.startTime(), row[0]) < sample_size: window.append(row) continue # Grow the buffer if the std is below # threshold. This is to capture ranges of bad # data that could be larger than the window. # Otherwise, pop the first item off and write # it to the output file and shift the window. if window.std() > threshold: output_writer.writerow(window.pop()) window.append(row) continue window.append(row) if window.std() > threshold: print 'Sensor ' + window.who_am_i() print 'Standard deviation below threshold...' print 'Discarding ' + str(window.size()) + ' items...' print 'Starting from ' + window.startTime() + ' and ending on ' + window.endTime() window.empty() window.append(row) # Check to see if the remaining items are within the # threshold of good data. for window in windows: if window.std() > threshold: while window.size() != 0: output_writer.writerow(window.pop()) else: print 'Sensor ' + window.who_am_i() print 'Standard deviation below threshold...' print 'Discarding ' + str(window.size()) + ' items...' print 'Starting from ' + window.startTime() + ' and ending on ' + window.endTime() # Move input file to archive directory. shutil.move(os.path.join(input_dir, item), os.path.join(archive_dir, item)) end_time = datetime.datetime.now() difference = end_time - start_time difference = difference.total_seconds() / 60.0 / 60.0 print 'Run Time: ' + difference + ' hours'
def main(argv): start_time = datetime.datetime.now() # Define input, output, and archive directory names. input_dir_name = 'reshaped' archive_dir_name = 'archived' output_dir_name = 'output' # Must have two input parameters. if len(argv) != 2: print 'usage: driver.py <sample size (hours)> <threshold for std>' return # Save the input parameters. sample_size = int(argv[0]) threshold = int(argv[1]) # Process raw data files into the eshape format. # [datetime, sensor id, value] reshape() # Define current working directory. current_dir = os.path.dirname(os.path.abspath(__file__)) # Define input, output, and archive directories input_dir = os.path.join(current_dir, input_dir_name) archive_dir = os.path.join(input_dir, archive_dir_name) output_dir = os.path.join(current_dir, output_dir_name) # Scans for all CSV files within the input directory. for item in os.listdir(input_dir): if item.split('.')[len(item.split('.')) - 1].lower() == 'csv': windows = [Window('t14'), Window('t15'), Window('t16'), \ Window('t17'), Window('t18')] # Open input and output files. with open(os.path.join(input_dir, item)) as input_file: input_reader = csv.reader(input_file) with open(os.path.join(output_dir, item), 'wb') as output_file: output_writer = csv.writer(output_file) print 'Processing ' + item # Write header into the output file. output_writer.writerow(input_reader.next()) for row in input_reader: for window in windows: if row[1] == window.who_am_i(): # Add row if the buffer is empty. if window.size() == 0: window.append(row) continue # Add rows until the buffer contains points # within the window size. if compare_hour(window.startTime(), row[0]) < sample_size: window.append(row) continue # Grow the buffer if the std is below # threshold. This is to capture ranges of bad # data that could be larger than the window. # Otherwise, pop the first item off and write # it to the output file and shift the window. if window.std() > threshold: output_writer.writerow(window.pop()) window.append(row) continue window.append(row) if window.std() > threshold: print 'Sensor ' + window.who_am_i() print 'Standard deviation below threshold...' print 'Discarding ' + str( window.size()) + ' items...' print 'Starting from ' + window.startTime( ) + ' and ending on ' + window.endTime() window.empty() window.append(row) # Check to see if the remaining items are within the # threshold of good data. for window in windows: if window.std() > threshold: while window.size() != 0: output_writer.writerow(window.pop()) else: print 'Sensor ' + window.who_am_i() print 'Standard deviation below threshold...' print 'Discarding ' + str( window.size()) + ' items...' print 'Starting from ' + window.startTime( ) + ' and ending on ' + window.endTime() # Move input file to archive directory. shutil.move(os.path.join(input_dir, item), os.path.join(archive_dir, item)) end_time = datetime.datetime.now() difference = end_time - start_time difference = difference.total_seconds() / 60.0 / 60.0 print 'Run Time: ' + difference + ' hours'