示例#1
0
# #keras implimentation
batch_size = 1
tsteps = 50
data_dim = 25
#epochs
epochs = 50
# # #data spliting
data_source_train = data_source[:100000, :]
data_source_valid = data_source[100000:, :]
data_target_train = data_target[:100000, :]
data_target_valid = data_target[100000:, :]

# #zeros padding
#source data
data_source_train = reshape(data_source_train, tsteps, data_dim)
data_source_valid = reshape(data_source_valid, tsteps, data_dim)
#target data
data_target_train = reshape(data_target_train, tsteps, data_dim)
data_target_valid = reshape(data_target_valid, tsteps, data_dim)
#checking shape
print('source data', np.shape(data_source_train))
print('target data', np.shape(data_target_train))
#model selection
model = Sequential()
model.add(
    GRU(units=70,
        batch_input_shape=(batch_size, tsteps, data_dim),
        return_sequences=True,
        stateful=True))
示例#2
0
def main(argv):
    # Define input, output, and archive directory names.
    turbine_dir_name = 'reshaped_turbine'
    weather_dir_name = 'raw_weather'
    archive_dir_name = 'archived'
    output_dir_name  = 'output'
    # Process raw turbine files into the eshape format.
    # [datetime, sensor id, value]
    reshape()
    # Define current working directory.
    current_dir = os.path.dirname(os.path.abspath(__file__))
    # Define input, output, and archive directories.
    turbine_dir         = os.path.join(current_dir, turbine_dir_name)
    weather_dir         = os.path.join(current_dir, weather_dir_name)
    turbine_archive_dir = os.path.join(turbine_dir, archive_dir_name)
    weather_archive_dir = os.path.join(weather_dir, archive_dir_name)
    output_dir  = os.path.join(current_dir, output_dir_name)
    # Process all of the turbine data.
    # Scan for all CSV files within the turbine data folder.
    for item in os.listdir(turbine_dir):
        if item.split('.')[-1].lower() == 'csv':
            windows = [Window('t14'), Window('t15'), Window('t16'), \
                       Window('t17'), Window('t18')]
            # Open input and output files.
            with open(os.path.join(turbine_dir, item)) as input_file:
                input_reader = csv.reader(input_file)
                with open(os.path.join(output_dir, item), 'wb') as output_file:
                    output_writer = csv.writer(output_file)
                    print 'Processing ' + item
                    # Write header into the output file.
                    output_writer.writerow(input_reader.next())
                    for row in input_reader:
                        for window in windows:
                            if row[1] == window.who_am_i():
                                # Add row if the buffer is empty.
                                if window.size() == 0:
                                    window.append(row)
                                    continue
                                if compare_minute(window.startTime(), row[0]) < 9:
                                    window.append(row)
                                    continue
                                if compare_minute(window.startTime(), row[0]) == 9:
                                    window.append(row)
                                    if window.size() >= 7:
                                        output_writer.writerow([window.startTime(), window.who_am_i(), window.average()])

                                if compare_minute(window.startTime(), row[0]) > 9:
                                    if window.size() >= 7:
                                        output_writer.writerow([window.startTime(), window.who_am_i(), window.average()])
                                    window.empty()
                                    window.append(row)
                                    continue
                                window.empty()
            shutil.move(os.path.join(turbine_dir, item), os.path.join(turbine_archive_dir, item))
    # Process all of the weather data.
    # Scan for all CSV files within the weather data folder.
    for item in os.listdir(weather_dir):
        if item.split('.')[-1].lower() == 'csv':
            window = Window('speed_avg')
            # Open input and output files.
            with open(os.path.join(weather_dir, item)) as input_file:
                input_reader = csv.reader(input_file)
                with open(os.path.join(output_dir, item), 'wb') as output_file:
                    output_writer = csv.writer(output_file)
                    print 'Processing ' + item
                    input_reader.next()
                    # Write header into the output file.
                    output_writer.writerow(['datetime', 'value'])
                    for row in input_reader:
                        # Add row if the buffer is empty.
                        if window.size() == 0:
                            window.append([row[0], window.who_am_i(), row[2]])
                            continue
                        if compare_minute(window.startTime(), row[0]) < 9:
                            window.append([row[0], window.who_am_i(), row[2]])
                            continue
                        if compare_minute(window.startTime(), row[0]) == 9:
                            window.append([row[0], window.who_am_i(), row[2]])
                            if window.size() >= 7:
                                output_writer.writerow([window.startTime(), window.average()])

                        if compare_minute(window.startTime(), row[0]) > 9:
                            if window.size() >= 7:
                                output_writer.writerow([window.startTime(), window.average()])
                            window.empty()
                            window.append([row[0], window.who_am_i(), row[2]])
                            continue
                        window.empty()
            shutil.move(os.path.join(weather_dir, item), os.path.join(weather_archive_dir, item))
    return 1
import h5py
import numpy as np

from keras.models import model_from_json

from reshape import reshape
from convert_nn import convert_waveform
batch_size = 1
tsteps = 50
data_dim = 25

print('loadig model')
with open('model/BidirLSTM.json', 'r') as model_json:
    model = model_from_json(model_json.read())

model.load_weights('model/BidirLSTM.h5')

f = h5py.File('data/h5/100001.h5', 'r')
data_source_test = f.get('jnt')
data_source_test = np.array(data_source_test)
print(data_source_test.shape)

data_source_test = reshape(data_source_test, tsteps, data_dim)
print('Predicting')
prediction_test = model.predict(data_source_test, batch_size=batch_size)
print(prediction_test.shape)
prediction_test = prediction_test.reshape(-1, data_dim)
print(prediction_test.shape)

convert_waveform(prediction_test, "'bidir_lstm.wav'")
cnnConv2LayerMatrix = re.RELU_3D(
    conv.convolution_RGB(cnnConv1LayerMatrix, kernelConvLayer2))
cnnConv2LayerMatrix = pool.pooling(cnnConv2LayerMatrix,
                                   poolingSizeConvLayer2,
                                   poolingStrideConvLayer2,
                                   poolType='max')

# Fully connected Layer1 processing

#first layer vector(7*7*36,128)

fcVectorLayer1 = np.zeros((1764, 128))

initVector(fcVectorLayer1, 1)

cnnConv2LayerReshape = rshape.reshape(cnnConv2LayerMatrix)
#print('FullyConnectedLayer0 (reshape) = ',cnnConv2LayerReshape)
cnnFullyConnectedLayer1 = matmult.matrixMult_CHn(cnnConv2LayerReshape,
                                                 fcVectorLayer1)

#print('FullyConnectedLayer1 = ',cnnFullyConnectedLayer1)

#second layer vector(128,10)
fcVectorLayer2 = np.zeros((128, 10))
initVector(fcVectorLayer2, 2)

cnnFullyConnectedLayer2 = matmult.matrixMult_CHn(cnnFullyConnectedLayer1,
                                                 fcVectorLayer2)

#print('FullyConnectedLayer2 = ',cnnFullyConnectedLayer2)
示例#5
0
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from reshape import reshape
import base64


url = 'https://github.com/Mahyarazad/mahyarazad/raw/master/Base.xlsx'
df = pd.read_excel(url)
mapbox_secrect_key = os.environ.get('mapBox')
#####  DataFrame Reshaping #####
sd = reshape(df).summation
city_data = reshape(df).dict_data_gen
cv = reshape(df).geo
cvt = cv.drop(columns = [cv.columns[3],cv.columns[4]])
cvt['MOM Ratio'] = ((cvt[cvt.columns[-1]]- cvt[cvt.columns[-2]])/cvt[cvt.columns[-2]]).apply('{:.0%}'.format)
cvt = cvt.sort_values(by=cvt.columns[3],ascending = False)


#################################


north_list = [
                'As Sulaymānīyah',
                'Altameem',
                'Dahūk',
                'Arbīl',
#garder que le rouge
#kernel[0,0]=np.array([1,0,0,0,0,0,0,0,0]).reshape(3,3)
#kernel[1,0]=np.array([1,0,0,0,0,0,0,0,0]).reshape(3,3)
#kernel[2,0]=np.array([1,0,0,0,0,0,0,0,0]).reshape(3,3)

print("start")
print(time.localtime())
new_image = np.zeros((cnnLayer1Matrix.shape[0], cnnLayer1Matrix.shape[2], 1))
new_image = re.RELU_3D(conv.convolution_RGB(cnnLayer1Matrix, kernel))

new_image2 = pool.pooling(new_image, 4, 4, 'max')

new_image5 = re.RELU_3D(conv.convolution_nb_kernel(new_image2, kernel2))
new_image6 = pool.pooling(new_image5, 4, 4, 'max')
new_image3 = resha.reshape(new_image2)
b = np.zeros((1, 1000))
new_image4 = multi.matrixMult(new_image3, b)
print("end")
print(time.localtime())

#print("input image")
#print(cnnLayer1Matrix)

#print("somme")
#print(new_image)
#print(new_image.shape)

#print("test")
#print(test_image)
#print(test_image.shape)
示例#7
0
    def handle_rdd(rdd):
        global ss, now, cnt_in, cnt_out

        if not rdd.isEmpty():

            data_in = "data_in.txt"
            data_out = "data_out.txt"

            def has_column(df, col):
                try:
                    df[col]
                    return True
                except:
                    return False

            dataframe = ss.createDataFrame(rdd, Schema.tweet_data)

            ## data_in count
            with open(data_in, "a") as di:
                cnt_in += dataframe.count()
                now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                di.write(now + '\n')
                di.write('per 5 seconds: ' + str(dataframe.count()) +
                         '  total count: ' + str(cnt_in) + '\n')

            ## remove 'matchin_rules' column
            dataframe = dataframe.drop('matching_rules')
            #dataframe.show(3)

            ## dataframe to JSON(list)
            df = dataframe.toJSON().map(lambda x: json.loads(x)).collect()

            ## JSON reshape return
            reshaped_json = reshape.reshape(df,
                                            'covid-19')  ## json(list), topic

            ## convert list to RDD to Json
            new_df = sc.parallelize(reshaped_json).map(lambda x: json.dumps(x))

            ## convert Json to Dataframe
            new_df = ss.read.json(new_df)

            new_df.show(3)
            print()

            ## data_out count
            with open(data_out, "a") as do:
                cnt_out += new_df.count()
                now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                do.write(now + '\n')
                do.write('per 5 seconds: ' + str(new_df.count()) +
                         '  total count: ' + str(cnt_out) + '\n')

            output_file_name = "error_message.txt"

            try:
                new_df.write \
                    .format("org.apache.spark.sql.cassandra") \
                    .mode('append') \
                    .options(table="tweets", keyspace="tweettrend") \
                    .save()

            except Exception as e:
                with open(output_file_name, "a",
                          encoding="utf-8") as output_file:
                    output_file.write(type(e), e)
                    output_file.write(reshaped_json)
示例#8
0
def main(argv):
    start_time = datetime.datetime.now()
    # Define input, output, and archive directory names.
    input_dir_name   = 'reshaped'
    archive_dir_name = 'archived'
    output_dir_name  = 'output'
    # Must have two input parameters.
    if len(argv) != 2:
        print 'usage: driver.py <sample size (hours)> <threshold for std>'
        return
    # Save the input parameters.
    sample_size = int(argv[0])
    threshold   = int(argv[1])
    # Process raw data files into the eshape format.
    # [datetime, sensor id, value]
    reshape()
    # Define current working directory.
    current_dir = os.path.dirname(os.path.abspath(__file__))
    # Define input, output, and archive directories
    input_dir   = os.path.join(current_dir, input_dir_name)
    archive_dir = os.path.join(input_dir, archive_dir_name)
    output_dir  = os.path.join(current_dir, output_dir_name)
    # Scans for all CSV files within the input directory.
    for item in os.listdir(input_dir):
        if item.split('.')[len(item.split('.'))-1].lower() == 'csv':

            windows = [Window('t14'), Window('t15'), Window('t16'), \
                       Window('t17'), Window('t18')]

            # Open input and output files.
            with open(os.path.join(input_dir, item)) as input_file:
                input_reader = csv.reader(input_file)
                with open(os.path.join(output_dir, item), 'wb') as output_file:
                    output_writer = csv.writer(output_file)
                    print 'Processing ' + item
                    # Write header into the output file.
                    output_writer.writerow(input_reader.next())
                    for row in input_reader:
                        for window in windows:
                            if row[1] == window.who_am_i():
                                # Add row if the buffer is empty.
                                if window.size() == 0:
                                    window.append(row)
                                    continue
                                # Add rows until the buffer contains points
                                # within the window size.
                                if compare_hour(window.startTime(), row[0]) < sample_size:
                                    window.append(row)
                                    continue
                                # Grow the buffer if the std is below
                                # threshold. This is to capture ranges of bad
                                # data that could be larger than the window.
                                # Otherwise, pop the first item off and write
                                # it to the output file and shift the window.
                                if window.std() > threshold:
                                    output_writer.writerow(window.pop())
                                    window.append(row)
                                    continue
                                window.append(row)
                                if window.std() > threshold:
                                    print 'Sensor ' + window.who_am_i()
                                    print 'Standard deviation below threshold...'
                                    print 'Discarding ' + str(window.size()) + ' items...'
                                    print 'Starting from ' + window.startTime() + ' and ending on ' + window.endTime()
                                    window.empty()
                                    window.append(row)
                    # Check to see if the remaining items are within the
                    # threshold of good data.
                    for window in windows:
                        if window.std() > threshold:
                            while window.size() != 0:
                                output_writer.writerow(window.pop())
                        else:
                            print 'Sensor ' + window.who_am_i()
                            print 'Standard deviation below threshold...'
                            print 'Discarding ' + str(window.size()) + ' items...'
                            print 'Starting from ' + window.startTime() + ' and ending on ' + window.endTime()
            # Move input file to archive directory.
            shutil.move(os.path.join(input_dir, item), os.path.join(archive_dir, item))
    end_time = datetime.datetime.now()
    difference = end_time - start_time
    difference = difference.total_seconds() / 60.0 / 60.0
    print 'Run Time: ' + difference + ' hours'
示例#9
0
def main(argv):
    start_time = datetime.datetime.now()
    # Define input, output, and archive directory names.
    input_dir_name = 'reshaped'
    archive_dir_name = 'archived'
    output_dir_name = 'output'
    # Must have two input parameters.
    if len(argv) != 2:
        print 'usage: driver.py <sample size (hours)> <threshold for std>'
        return
    # Save the input parameters.
    sample_size = int(argv[0])
    threshold = int(argv[1])
    # Process raw data files into the eshape format.
    # [datetime, sensor id, value]
    reshape()
    # Define current working directory.
    current_dir = os.path.dirname(os.path.abspath(__file__))
    # Define input, output, and archive directories
    input_dir = os.path.join(current_dir, input_dir_name)
    archive_dir = os.path.join(input_dir, archive_dir_name)
    output_dir = os.path.join(current_dir, output_dir_name)
    # Scans for all CSV files within the input directory.
    for item in os.listdir(input_dir):
        if item.split('.')[len(item.split('.')) - 1].lower() == 'csv':

            windows = [Window('t14'), Window('t15'), Window('t16'), \
                       Window('t17'), Window('t18')]

            # Open input and output files.
            with open(os.path.join(input_dir, item)) as input_file:
                input_reader = csv.reader(input_file)
                with open(os.path.join(output_dir, item), 'wb') as output_file:
                    output_writer = csv.writer(output_file)
                    print 'Processing ' + item
                    # Write header into the output file.
                    output_writer.writerow(input_reader.next())
                    for row in input_reader:
                        for window in windows:
                            if row[1] == window.who_am_i():
                                # Add row if the buffer is empty.
                                if window.size() == 0:
                                    window.append(row)
                                    continue
                                # Add rows until the buffer contains points
                                # within the window size.
                                if compare_hour(window.startTime(),
                                                row[0]) < sample_size:
                                    window.append(row)
                                    continue
                                # Grow the buffer if the std is below
                                # threshold. This is to capture ranges of bad
                                # data that could be larger than the window.
                                # Otherwise, pop the first item off and write
                                # it to the output file and shift the window.
                                if window.std() > threshold:
                                    output_writer.writerow(window.pop())
                                    window.append(row)
                                    continue
                                window.append(row)
                                if window.std() > threshold:
                                    print 'Sensor ' + window.who_am_i()
                                    print 'Standard deviation below threshold...'
                                    print 'Discarding ' + str(
                                        window.size()) + ' items...'
                                    print 'Starting from ' + window.startTime(
                                    ) + ' and ending on ' + window.endTime()
                                    window.empty()
                                    window.append(row)
                    # Check to see if the remaining items are within the
                    # threshold of good data.
                    for window in windows:
                        if window.std() > threshold:
                            while window.size() != 0:
                                output_writer.writerow(window.pop())
                        else:
                            print 'Sensor ' + window.who_am_i()
                            print 'Standard deviation below threshold...'
                            print 'Discarding ' + str(
                                window.size()) + ' items...'
                            print 'Starting from ' + window.startTime(
                            ) + ' and ending on ' + window.endTime()
            # Move input file to archive directory.
            shutil.move(os.path.join(input_dir, item),
                        os.path.join(archive_dir, item))
    end_time = datetime.datetime.now()
    difference = end_time - start_time
    difference = difference.total_seconds() / 60.0 / 60.0
    print 'Run Time: ' + difference + ' hours'