示例#1
0
def open_sim_out_file(chan):
    global out_file
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    out_dir_path = graphs_results_dir + constants.path_sim_graphs_dir
    out_file_name = out_dir_path + "similarity_report_" + chan + "_" + datestr + ".csv"
    out_file = open(out_file_name, "w")
示例#2
0
def create_graph_directory():
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    grap_types_dirs = { \
        constants.path_radar_graphs_dir, \
        constants.path_sim_graphs_dir, \
        constants.path_scatered_graphs_dir, \
        constants.path_random_forest_graphs_dir \
        }

    if not os.path.exists(graphs_results_dir):
        os.makedirs(graphs_results_dir)
    for dir_name in grap_types_dirs:
        path = graphs_results_dir + dir_name
        if not os.path.exists(path):
            os.makedirs(path)
示例#3
0
def create_scatter_chart(dataframe, feature, sorter):
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    out_dir_path = graphs_results_dir + constants.path_scatered_graphs_dir
    if not os.path.exists(out_dir_path):
        os.makedirs(out_dir_path)

    figscatter = px.scatter(dataframe,
                            x=constants.channel_out_col_title,
                            y=feature,
                            color=sorter,
                            height=1000,
                            width=1600)
    figscatter.update_layout(margin=dict(l=20, r=20, t=20, b=20))
    scatter_chart_file_name = out_dir_path + '\\' + feature + '_scatter_chart.png'
    figscatter.write_image(file=scatter_chart_file_name, format='png')
示例#4
0
def create_radar_charts(dataframe, feature, sorter):
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    out_dir_path = graphs_results_dir + constants.path_radar_graphs_dir
    if not os.path.exists(out_dir_path):
        os.makedirs(out_dir_path)
    #fig = go.Figure()
    fig = px.scatter_polar(dataframe,
                           r=feature,
                           theta=constants.channel_out_col_title,
                           color=sorter,
                           height=1600,
                           width=1800)

    #fig = px.line_polar(dataframe,
    #r=feature,
    #theta="QCM",
    #color="Product",
    #height=1600, width=1600,
    #)

    #for product in dataframe['Product'].tolist():
    #rows_array = dataframe.loc[dataframe['Product'] == product]
    #for q in channels_list:
    #q_rows = rows_array.loc[rows_array['QCM'] == q]
    #for index, row in q_rows.iterrows():
    #print(row)

    ##fig.show()
    fig.update_traces(mode="lines+markers",
                      marker=dict(symbol="diamond-open", size=6))
    fig.update_layout(polar=dict(radialaxis=dict(visible=True)),
                      showlegend=True,
                      title=feature,
                      font=dict(family="Courier New, monospace",
                                size=32,
                                color="Black"),
                      margin=dict(l=270, r=80, t=80, b=20))
    radar_chart_file_name = out_dir_path + '\\' + feature + '_radar_chart.png'
    fig.write_image(file=radar_chart_file_name, format='png')
示例#5
0
def sort_samples(samples_array, sorter):
    prot = feature_extractor.protocol_attr()
    for sample in samples_array:
        if sample.sampler_type not in sorted_samples:
            sorted_samples[sample.sampler_type] = {}
        brand_prod = sample.brand + "_" + sample.product
        if brand_prod not in sorted_samples[sample.sampler_type]:
            sorted_samples[sample.sampler_type][brand_prod] = {}
        for channel in sample.values.columns[1:]:
            if (sample.values[channel] == 0).all():
                continue
            card_channel = sample.card + "_" + channel
            if card_channel not in sorted_samples[
                    sample.sampler_type][brand_prod]:
                sorted_samples[
                    sample.sampler_type][brand_prod][card_channel] = []
            ch_data = channel_data()
            ch_data.sample_id = sample.ID
            ch_data.note = sample.note
            ch_data.tags = sample.tags
            ch_data.values = sample.values[["time", channel]]
            ch_data.values[channel] -= ch_data.values[channel][30]
            ch_data.values[channel] = signal_process.smooth(
                ch_data.values[channel])
            ch_data.derviate_1 = signal_process.get_derivative_1(
                ch_data.values[channel])
            ch_data.derviate_2 = signal_process.get_derivative_2(
                ch_data.values[channel])
            ch_data.picks_list = feature_extractor.get_picks_indexes(
                ch_data, 0, ch_data.values.size)
            ch_data.protocol = prot
            feature_extractor.extract_features(ch_data, prot)
            sorted_samples[
                sample.sampler_type][brand_prod][card_channel].append(ch_data)
    datestr = constants.get_date_str()
    features_results_dir = constants.path_result_dir + datestr + constants.path_features_dir
    features_file_name = features_results_dir + "features_" + "_" + datestr + ".csv"
    if not os.path.exists(features_results_dir):
        os.makedirs(features_results_dir)
    feature_extractor.flush_features_data_frame(features_file_name, sorter)
示例#6
0
def create_mean_graphs(chan_array, sorter):
    tags_dict = {}
    first_ch_data = chan_array[0]
    card = sample_file_parser.get_sample_card(first_ch_data.sample_id)
    if sorter == sample_file_parser.tags_col_name:
        product = sample_file_parser.get_sample_prod(first_ch_data.sample_id)
    amount_of_samples = len(chan_array)
    channel = first_ch_data.values.columns[1]
    for ch_data in chan_array:
        tagstr = sample_file_parser.get_sample_tag(ch_data.sample_id)
        if sorter == sample_file_parser.product_col_name:
            product = sample_file_parser.get_sample_prod(ch_data.sample_id)
            tags = tagstr + "_" + product
        else:
            tags = tagstr
        if tags not in tags_dict:
            tags_dict[tags] = {}
            tags_dict[tags][similarity.raw_data_key] = []
            tags_dict[tags][similarity.first_derivative_key] = []
            tags_dict[tags][similarity.second_derivative_key] = []
        line = np.array(ch_data.values[ch_data.values.columns[1]])
        tags_dict[tags][similarity.raw_data_key].append(line.copy())
        line = np.array(ch_data.derviate_1)
        tags_dict[tags][similarity.first_derivative_key].append(line.copy())
        line = np.array(ch_data.derviate_2)
        tags_dict[tags][similarity.second_derivative_key].append(line.copy())
    mean_lines = {}
    for tags in tags_dict:
        mean_lines[tags] = {}
        for key in tags_dict[tags]:
            line_list = list(tags_dict[tags][key])
            length = len(line_list)
            sum_array = np.sum(line_list, axis=0)
            mean_line = sum_array / length
            mean_lines[tags][key] = mean_line
    figW = 18
    figH = 10
    fig, ax = plt.subplots(3, 1, figsize=(figW, figH))
    time_array = get_time_array()
    for tags in mean_lines:
        i = 0
        for data_type in mean_lines[tags]:
            line = mean_lines[tags][data_type]
            ax[i].plot(time_array[0:509], line[0:509], label=tags)
            ax[i].legend(loc='upper left', bbox_to_anchor=(-0.2, 1.3))
            i += 1

    ax[0].set_title(similarity.raw_data_key)
    ax[1].set_title(similarity.first_derivative_key)
    ax[2].set_title(similarity.second_derivative_key)
    plt.subplots_adjust(hspace=0.3,
                        left=0.18,
                        bottom=1 / figH,
                        top=1 - 1 / figH)
    if sorter == sample_file_parser.tags_col_name:
        suptitle = "card: " + card + ", channel: " + channel + ", Product: " + product + "\n" + "amount of samples = " + str(
            amount_of_samples)
    else:
        suptitle = "card: " + card + ", channel: " + channel + "\n" + "amount of samples = " + str(
            amount_of_samples)
    fig.suptitle(suptitle, y=1)
    fig.canvas.set_window_title("Average graphs")
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    out_dir_path = graphs_results_dir + constants.path_average_graphs_dir
    if not os.path.exists(out_dir_path):
        os.makedirs(out_dir_path)
    if sorter == sample_file_parser.tags_col_name:
        card_chan_prod = card + "_" + channel + "_" + product
    else:
        card_chan_prod = card + "_" + channel
    out_file_name = out_dir_path + "average_graph_" + card_chan_prod + "_" + datestr + ".png"
    plt.savefig(out_file_name)
示例#7
0
def create_sim_plots(groups_dict, chan):
    global time_array, markers_list, markers_list_index, prod_tag_markers
    global card, channel
    global line_color, color_name_index
    figW = 18
    figH = 10
    fig, ax = plt.subplots(3, 1, figsize=(figW, figH))
    color_name_index = 0
    prod_tag_markers.clear()
    markers_list_index = 0
    card = ""
    channel = ""
    group_statistics = "\n"
    keys_list = [
        similarity.raw_data_key, similarity.first_derivative_key,
        similarity.second_derivative_key
    ]
    sub_plot_titles_pre_list = [
        "Sample relative data", "1st derivative", "2nd derivative"
    ]
    xpositions = []
    subplot_index = 0
    open_sim_out_file(chan)
    for key in keys_list:
        group_statistics = "\n"
        color_name_index = 0
        for group in groups_dict[key]:
            add_group_data(groups_dict[key][group], key)
            color_name_index += 1
            while names[color_name_index] in light_colors:
                color_name_index += 1
            Marker = ''
            group_statistics += group + " members->" + str(
                groups_dict[key][group].members_count) + "\n"
            for ch_data in groups_dict[key][group].chan_data_array:
                if len(xpositions) == 0:
                    xpositions = feature_extractor.calculate_prot_timing(
                        ch_data.protocol)
                add_graph_line(ax, ch_data, key)
                add_row(key, group, ch_data)
        #ax[subplot_index].set_title(sub_plot_titles_pre_list[subplot_index] + group_statistics)
        ax[subplot_index].set_title(sub_plot_titles_pre_list[subplot_index])
        subplot_index += 1
    close_sim_out_file()
    for i in range(3):
        #for xc in ch_data.picks_list:
        #ax[i].axvline(x=xc/10, color='k', linestyle='--')
        for xp in xpositions:
            ax[i].axvline(x=xp / 10, color='r', linestyle='--')
        ax[i].axhline(y=0, color='green', linestyle='dotted')
    ax[0].legend(loc='upper left', bbox_to_anchor=(-0.2, 1.3))
    ax[1].legend(loc='upper left', bbox_to_anchor=(0.8, 1.8))
    ax[2].legend(loc='upper left', bbox_to_anchor=(0, 1.8))
    plt.subplots_adjust(hspace=0.3,
                        left=0.18,
                        bottom=1 / figH,
                        top=1 - 1 / figH)
    fig.suptitle("card: " + card + ", channel: " + channel, y=1)
    plt.get_current_fig_manager().full_screen_toggle()
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    graph_file_name = "simgraph_" + card + "_" + channel + "_" + datestr + ".png"
    out_dir_path = graphs_results_dir + constants.path_sim_graphs_dir
    plt.savefig(out_dir_path + graph_file_name)
    #plt.show()
    plt.close(fig)
示例#8
0
#if col != 'time':
#samp.values[col] = signal_process.smooth(samp.values[col])

sample.sort_samples(Samples, sample_file_parser.product_col_name)

graphs_creator.create_sim_graphs(graphs_creator.split_by_prod)
#graphs_creator.create_sim_graphs(graphs_creator.split_by_tag)
#print(sample.sorted_samples['prototype_1_aromabit']['Kanaf_Tilapia']['qcm_3'][0].values)
#similarity.group_by_similarity(sample.sorted_samples['prototype_1_aromabit']['Kanaf_Tilapia']['qcm_3'])
#for prod in sample.sorted_samples['prototype_1_aromabit']:
#print("==============Product = " + prod + "===============")
#for chan in sample.sorted_samples['prototype_1_aromabit'][prod]:
#print( "----------------------Chan = " + chan + "---------------------")
#similarity.group_by_similarity(sample.sorted_samples['prototype_1_aromabit'][prod][chan])

datestr = constants.get_date_str()

out_dir_path = constants.path_result_dir + datestr
out_file_name = out_dir_path + "/badsamples_report_" + datestr + ".csv"
out_file = open(out_file_name, "w")
head_line = "sample id, data type,chan card,tags,refcount\n"
out_file.write(head_line)
for sample_key in sorted(similarity.bad_sample_dict):
    bad_samp = similarity.bad_sample_dict[sample_key]
    line = str(
        bad_samp.sample_id
    ) + "," + bad_samp.key + "," + bad_samp.chan_card + "," + bad_samp.tags + "," + str(
        bad_samp.refcount) + "\n"
    out_file.write(line)
    print(line)
out_file.close()
示例#9
0
def runRandomForest(dataframe):
    featureList = []
    tagsCoulmnIndex = dataframe.columns.get_loc(
        sample_file_parser.tags_col_name)
    channels_list = dataframe[constants.channel_out_col_title].unique()

    features_list = dataframe.columns[(tagsCoulmnIndex + 1):]

    for channel in channels_list:
        for feature in dataframe.columns[(tagsCoulmnIndex + 1):]:
            featureList.append(channel + "_" + feature)
    randomForestDF = pd.DataFrame(
        columns=[sample_file_parser.product_col_name] +
        [sample_file_parser.tags_col_name] + featureList)
    for product in dataframe[sample_file_parser.product_col_name].unique():
        rows_array = dataframe.loc[dataframe[
            sample_file_parser.product_col_name] == product]
        out_rows = []
        for channel in rows_array[constants.channel_out_col_title].unique():
            channel_rows = rows_array.loc[rows_array[
                constants.channel_out_col_title] == channel]
            i = 0
            for index, row in channel_rows.iterrows():
                row_flat = row.values.tolist()
                if len(out_rows) < (i + 1):
                    out_rows.append(row_flat[2:])
                else:
                    if len(out_rows[i]) == 0:
                        out_rows[i] += row_flat[2:]
                    else:
                        out_rows[i] += row_flat[4:]
                i += 1
        j = 0
        while j < i:
            randomForestDF.loc[len(randomForestDF)] = out_rows[j]
            j += 1
    #RandomForest

    x = randomForestDF[featureList]

    #y = randomForestDF[ sample_file_parser.tags_col_name]
    y = randomForestDF[sample_file_parser.product_col_name]
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
    clf = RandomForestClassifier(n_estimators=1000)
    #Train the model using the training sets y_pred=clf.predict(X_test)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    # Model Accuracy, how often is the classifier correct?
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

    feature_imp = pd.Series(clf.feature_importances_,
                            index=featureList).sort_values(ascending=False)
    top_feature_imp = feature_imp[0:35]
    y_pos = np.arange(len(top_feature_imp))
    plt.figure(figsize=(20, 10))
    bars = plt.bar(y_pos,
                   top_feature_imp,
                   align='center',
                   alpha=1,
                   color=np.random.rand(len(top_feature_imp), 3))

    for rect in bars:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width() / 2.0,
                 height,
                 '%.2f' % height,
                 ha='center',
                 va='bottom',
                 fontsize=6)
    #features_names = []
    #for ind in indices:
    #   features_names.append(randomForestDF.columns[ind])
    plt.xticks(y_pos, top_feature_imp.index, rotation='vertical', fontsize=8)
    plt.yticks(np.arange(min(feature_imp), max(feature_imp), step=0.01),
               fontsize=6)
    #plt.xticks(y_pos, names, fontsize=8)
    # Add labels to your graph
    plt.ylabel('Feature Importance Score')
    plt.xlabel('Features')
    plt.title("Visualizing Important Features")
    plt.legend()
    plt.subplots_adjust(bottom=0.4)
    datestr = constants.get_date_str()
    graphs_results_dir = constants.path_result_dir + datestr + constants.path_graphs_dir
    graph_file_name = "random_forest_" + datestr + ".png"
    out_dir_path = graphs_results_dir + constants.path_random_forest_graphs_dir
    if not os.path.exists(out_dir_path):
        os.makedirs(out_dir_path)
    plt.savefig(out_dir_path + graph_file_name)
    #plt.show()
    plt.close()