Exemplo n.º 1
0
def plot_inkml(data, plot=True, save=False, classes=False):

    filename = None

    with open('colors','r') as c:  # List of colors for each class, read from the 'colors' file
        colors = eval(c.read())

    if isinstance(data, str):
        filename = data
        data = parse_inkml(data)  # Parses the inkml file if the input data is a filename (string) instead of some already processed inkml data (list)

    for i in range(0, len(data['trace'])):  # data['trace'] is a list of traces; each element contains a trace, and each trace has a list of [x, y] coordinates
        x = []
        y = []
        for j in range(0, len(data['trace'][i])):  # Returns j-th point of each trace
            x.append(data['trace'][i][j][0])  # Returns x coordinate of the j-th point of each trace
            y.append(data['trace'][i][j][1])  # Returns y coordinate of the j-th point of each trace
        if classes:
            plt.plot(x, y, color=colors[data['class'][i]], linewidth=0.6)  # Plots current trace highlighting the trace's class with a specific color
        else:
            plt.plot(x, y, color='black', linewidth=0.6)  # Plots current trace in plain black

    plt.axis('equal')  # Constrain proportions
    plt.axis('off')  # Remove axes from figure

    if plot:  # Only shows plot if plot = True
        plt.show()

    if save and filename is not None:  # If save = True and the input data is a filename (not some already processed data), it saves the plot as a 640x480 px png image in a folder with the same name as the original file folder, with '_png' added at the end
        plt.savefig(filename.partition('/')[0] + '_png/' + filename.partition('/')[2].replace('inkml', 'png'))
        plt.cla()  # Clears plot (otherwise subsequent plots would be drawn over the older ones)
def remove_random_elements_inkml(inkml_file, percentage=30):
    data = parse_inkml(inkml_file)  # Parses inkml file
    perc = len(
        data['group_id'].unique()
    ) * percentage / 100  # dati quanti group_id ho, faccio il 30%: es ho 25 gropu_id distinti e faccio il 30% di 25.
    numRows = math.ceil(perc)  # arrotondo la percentuale
    random_number = [
    ]  #lista vuota che servirà per segnare quali sono i numeri random usciti
    for i in range(numRows):
        # randrange gives you an integral value
        irand = randrange(0, len(data['group_id'].unique(
        )))  #prendo un numero random tra 0 e il numero di group_id distinti
        while irand in random_number:
            irand = randrange(0, len(data['group_id'].unique()))
        random_number.append(irand)

        # Get names of indexes for which column group_id has value irand
        indexNames = data[data['group_id'] == irand].index

        # Delete these row indexes from dataFrame
        data.drop(indexNames, inplace=True)
    return data
Exemplo n.º 3
0
def plot_inkml_remove_text(data, save=True):
    print(data)
    filename = None
    if not ('b' in data):  # controlla che il file su cui fare i bb sia un file di train e NON di test
        with open('colors','r') as c:  # List of colors for each class, read from the 'colors' file
            colors = eval(c.read())

        if isinstance(data, str):
                filename = data
                data = parse_inkml(data)  # Parses the inkml file if the input data is a filename (string) instead of some already processed inkml data (list)


        for i in range(0, len(data['trace'])):  # data['trace'] is a list of traces; each element contains a trace, and each trace has a list of [x, y] coordinates
            x = []
            y = []
            for j in range(0, len(data['trace'][i])):  # Returns j-th point of each trace
                x.append(data['trace'][i][j][0])  # Returns x coordinate of the j-th point of each trace
                y.append(data['trace'][i][j][1])  # Returns y coordinate of the j-th point of each trace

            if not(data['class'][i] == 'Text'): # se ha come classe Text non viene plot
                plt.plot(x, y, color='black', linewidth=0.6)


        plt.axis('equal')  # Constrain proportions
        plt.axis('off')  # Remove axes from figure

        if save and filename is not None:  # If save = True and the input data is a filename (not some already processed data), it saves the plot as a 640x480 px png image in a folder with the same name as the original file folder, with '_png' added at the end
            fig1 = plt.gcf()
            #plt.show()
            plt.draw()
            #plt.cla()
            fig1.savefig('path/FCinkML_aug/' + filename.partition('/')[2].replace('FCinkML', 'FCinkML_aug').replace('writer', 'writer_augm_').replace('inkml', 'png')) #cambio il nome in modo da non sovrascrivere le immagini
            #fig1.savefig(filename.partition('/')[0] + '_aug/' + filename.partition('/')[2].replace('inkml', 'png'))

            plt.cla()  # Clears plot (otherwise subsequent plots would be drawn over the older ones)
    else:
        print('Test File')
Exemplo n.º 4
0
# Example svg to inkml conversion and inkml plotting
n = 1

for i in range(0, n + 1):
    svg_file = './NeoSmartpenM1_demo/neo_smtpen_m1_demo_' + str(
        i) + '.svg'  # svg file to be converted to inkml
    inkml_file = './NeoSmartpenM1_demo/neo_smtpen_m1_demo_' + str(
        i) + '.inkml'  # Converted file name
    svg2inkml(svg_file, inkml_file)  # Converts svg file to inkml
    plot_inkml(inkml_file)  # Plots the returned data

# Other test plots
show = True
plot_inkml(
    parse_inkml('./FCinkML/writer3_5.inkml'),
    plot=show)  # Plots a random inkml file from the dataset (by parsed data)
plot_inkml(
    './FCinkML/writer3_5.inkml',
    plot=show)  # Plots a random inkml file from the dataset (by filename)

plot_inkml(
    parse_inkml('./FCinkML/writer3_5.inkml'), plot=show, classes=True
)  # Plots a random inkml file from the dataset (by parsed data) and highlights classes
plot_inkml(
    './FCinkML/writer3_5.inkml', plot=show, classes=True
)  # Plots a random inkml file from the dataset (by filename) and highlights classes

from draw_bb import draw_bb
draw_bb('./FCinkML/writer3_5.inkml', show=True)
def bounding_boxes(inkml_file, save=False,  plot=False):

    annotation_file = 'noText_annotation.csv'  # Edit this line with your own annotation file

    data = parse_inkml(inkml_file)  # Parses inkml file

    cols = ['x_min', 'x_max', 'y_min', 'y_max', 'class']  # Columns of bboxs dataframe
    bboxs = pd.DataFrame(columns=cols)  # Dataframe containing all coordinates of all bounding boxes, plus the class for each bounding box
    #------------------------------------------------------------------------
    if plot:  # If plot = True, plot the original figure first

        with open('colors','r') as c:  # List of colors for each class, read from the 'colors' file
            colors = eval(c.read())

        for i in range(0, len(data['trace'])):  # data['trace'] is a list of traces; each element contains a trace, and each trace has a list of [x, y] coordinates
            x = []  # Contains current trace x coordinates
            y = []  # Contains current trace y coordinates
            for j in range(0, len(data['trace'][i])):  # Return j-th point of each trace
                x.append(data['trace'][i][j][0])  # Returns x coordinate of the j-th point of each trace
                y.append(data['trace'][i][j][1])  # Returns y coordinate of the j-th point of each trace
            #plt.plot(x, y, color='black', linewidth=0.6)  # Plot current trace in plain black
            if not (data['class'][i] == 'Text'):  # se ha come classe Text non viene plot
                plt.plot(x, y, color='black', linewidth=0.6)
        plt.axis('equal')  # Constrain proportions
        plt.axis('off')  # Remove axes from figure
    # ------------------------------------------------------------------------
    # Crea bb per ogni figura
    # Escludo le tracce che appartengono alla classe text
    for k in range(0, len(data['group_id'].drop_duplicates())):  # For each unique trace group of the inkml file...

        group_x = []  # Contains all x coordinates of all traces in a specific trace group
        group_y = []  # Contains all y coordinates of all traces in a specific trace group

        for i in range(0, len(data['trace'])):  # ...for each trace in the dataset...
            for j in range(0, len(data['trace'][i])):  # ...for each pair of [x, y] coordinates in trace i...
                if data['group_id'][i] == k:  # ...get the [x, y] coordinates in two separate arrays
                    if not (data['class'][i] == 'Text'):
                        label = data['class'][i]  # Identify the class of the current bounding box
                        group_x.append(data['trace'][i][j][0])  # Add all x coordinates of trace i to the group x coordinates
                        group_y.append(data['trace'][i][j][1])  # Add all y coordinates of trace i to the group y coordinates

        group_x.sort()  # Sort the x coordinates array
        group_y.sort()  # Sort the y coordinates array
    #----------------------------------
        bboxs = bboxs.append({'x_min': group_x[0], 'x_max': group_x[len(group_x) - 1], 'y_min': group_y[0], 'y_max': group_y[len(group_y) - 1], 'class': label}, ignore_index=True)  # Save minimum and maximum [x, y] coordinates to the bboxs dataframe
        # Add +1 to the maximum coordinates and -1 to the minimum to get

        x_coord = [group_x[0], group_x[len(group_x)-1], group_x[len(group_x)-1], group_x[0], group_x[0]]  # Contain all x coordinates of a bounding box, plus the first one repeated to be able to draw a rectangle over the figure
        y_coord = [group_y[len(group_y)-1], group_y[len(group_y)-1], group_y[0], group_y[0], group_y[len(group_y)-1]]  # Contain all x coordinates of a bounding box, plus the first one repeated to be able to draw a rectangle over the figure

        if plot:  # If plot = True, plot one by one the resulting bounding boxes over the original figure from data coordinates (not pixels!), that has also been plotted if plot = True
            plt.plot(x_coord, y_coord, color=colors[label], linewidth=1)

    pboxs = pd.DataFrame(columns=cols)  # Create a new dataframe containing the pixel coordinates of the bounding boxes

    # print('Calculating pixel coordinates for ' + filename, end='')

    for i in range(0, len(bboxs)):  # Transform data coordinates in bboxs to pixel coordinates
        pboxs = transform_coord(data, bboxs, i, pboxs)

    # print(' done.')

    if plot:  # If plot = True, show the generated plot; the whole plot thing is for debug purposes only (and nice, coloured images)
        plt.show()

    pboxs['class'] = bboxs[['class']].copy()  # Copy the labels from the original bboxs dataframe, as they will not change later

    if save:  # If save = True, append the bounding box pixel coordinates to the specified annotation file

        #DA METTERE .replace('writer', 'writer_augm_')
        filename = inkml_file.split('/')[len(inkml_file.split('/'))-1].replace('writer', 'writer_augm_').replace('.inkml', '.png')  # Get the image file name from the inkml file -------

        pboxs.insert(0, 'filename', 'path/' + filename)  # Insert a new column to pboxs in position 0, containing the processed file name

        cols = ['filename', 'x_min', 'y_min', 'x_max', 'y_max', 'class']  # csv file columns
        pboxs.to_csv(annotation_file, columns=cols, header=False, index=False, mode='a')  # Add data to the specified annotation file

        print('Annotation data for ' + inkml_file.split('/')[len(inkml_file.split('/'))-1] + ' saved to ' + annotation_file + '.')

    return pboxs, bboxs
Exemplo n.º 6
0
from parse_inkml import parse_inkml
from transform_coord import transform_coord

parser = argparse.ArgumentParser(description='Returns bounding boxes pixel coordinates for the specified file.')  # Parser definition

parser.add_argument('-file', action='store', dest='file')  # Declare an argument called '-inkml_file' that is going to be stored in the 'inkml_file' variable
parser.add_argument('-annotation', action='store', dest='annotation')  # Declare an argument called '-annotation' that is going to be stored in the 'annotation' variable

inkml_file = parser.parse_args().file  # Parse the command line arguments and store the value in the 'inkml_file' variable
annotation_file = parser.parse_args().annotation  # Parse the command line arguments and store the value in the 'annotation_file' variable

filename = inkml_file.split('/')[len(inkml_file.split('/'))-1].replace('.inkml', '.png')  # Get the image file name from the inkml file

print('Working on ' + filename + '... ', end='')

data = parse_inkml(inkml_file)  # Parses inkml file

cols = ['x_min', 'x_max', 'y_min', 'y_max', 'class']  # Columns of bboxs dataframe
bboxs = pd.DataFrame(columns=cols)  # Dataframe containing all coordinates of all bounding boxes, plus the class for each bounding box

for k in range(0, len(data['group_id'].drop_duplicates())):  # For each unique trace group of the inkml file...

    group_x = []  # Contains all x coordinates of all traces in a specific trace group
    group_y = []  # Contains all y coordinates of all traces in a specific trace group

    for i in range(0, len(data['trace'])):  # ...for each trace in the dataset...
        for j in range(0, len(data['trace'][i])):  # ...for each pair of [x, y] coordinates in trace i...
            if data['group_id'][i] == k:  # ...get the [x, y] coordinates in two separate arrays
                label = data['class'][i]  # Identify the class of the current bounding box
                group_x.append(data['trace'][i][j][0])  # Add all x coordinates of trace i to the group x coordinates
                group_y.append(data['trace'][i][j][1])  # Add all y coordinates of trace i to the group y coordinates