Пример #1
0
def yolo_position_with_noise_generator(yolo_position_folder,
                                       gui_folder=None,
                                       new_positions_folder=None,
                                       new_gui_folder=None,
                                       data_num=500,
                                       multiple=5,
                                       resort=False,
                                       save_origin_file=True):

    createFolder(new_positions_folder)
    if new_gui_folder:
        createFolder(new_gui_folder)
    for i in range(data_num):
        if gui_folder:
            read_gui = read_file(gui_folder + str(i) + TYPE.GUI, 'noSplit')

        read_positions = read_file(yolo_position_folder + str(i) + TYPE.TXT,
                                   'splitlines')
        positions = [position.split() for position in read_positions]
        print(positions)
        if save_origin_file:
            if gui_folder:
                write_file(read_gui, new_gui_folder + str(i) + TYPE.GUI, 0)
            write_file(positions, new_positions_folder + str(i) + TYPE.TXT, 2)

        positions = np.array(positions)
        positions = positions.astype(np.float)

        for times in range(1 if save_origin_file else 0, multiple):
            if gui_folder:
                new_gui_file_name = new_gui_folder + str(data_num * times +
                                                         i) + TYPE.GUI
            new_position_file_name = new_positions_folder + \
                str(data_num*times+i)+TYPE.TXT
            new_positions = positions.copy()
            if new_positions.shape[0] > 0:
                new_positions[:, 1:] = new_positions[:, 1:] + np.random.normal(
                    0, 0.0025,
                    (new_positions.shape[0], new_positions.shape[1] - 1))
                new_positions[:, 1:] = new_positions[:, 1:].clip(0, 1)
                new_positions = new_positions.tolist()
                for position in new_positions:
                    position[0] = int(position[0])
                if resort:
                    new_positions.sort(key=lambda x: x[1])
                    new_positions.sort(key=lambda x: x[2])
            if gui_folder:
                write_file(read_gui, new_gui_file_name, 0)
            write_file(new_positions, new_position_file_name, 2)
        print(i) if i % 500 == 0 else None
Пример #2
0
def to_yolo_training_file(img_folder, positions_folder, data_length,
                          target_path):
    print("img_folder: ", img_folder)
    print("positions_folder: ", positions_folder)
    print("target_path: ", target_path)
    with open(target_path, 'w+') as target:
        for index in range(data_length):
            img = cv2.imread(img_folder + str(index) + TYPE.IMG)
            (img_high, img_width, _) = img.shape
            boxs = []
            read_positions = read_file(
                positions_folder + str(index) + TYPE.TXT, 'splitlines')
            positions = [position.split() for position in read_positions]
            for position in positions:
                min_x, min_y = float(position[1]) * img_width, float(
                    position[2]) * img_high
                max_x, max_y = min_x + \
                    (float(position[3])*img_width), min_y + \
                    (float(position[4])*img_high)
                min_x, min_y, max_x, max_y = int(min_x), int(min_y), int(
                    max_x), int(max_y)
                box = ','.join([
                    str(min_x),
                    str(min_y),
                    str(max_x),
                    str(max_y), position[0]
                ])
                boxs.append(box)

            line = "{} {}".format(img_folder + str(index) + TYPE.IMG,
                                  " ".join(boxs))
            target.write(line + "\n")
Пример #3
0
def compare_attr_class(gt_label_file, predit_label_file, y_axis_labels: list,
                       x_axis_labels: list):
    # label 記得加"EOS"
    target_array = np.zeros((len(y_axis_labels), len(x_axis_labels)))
    print('initial target_array', target_array)
    gt_labels = read_file(gt_label_file, 'splitlines')
    predit_labels = read_file(predit_label_file, 'splitlines')
    gt_labels_array = [gt_label.split() for gt_label in gt_labels]
    for predit_label in predit_labels:
        predit_label = predit_label.split()
        for gt_label in gt_labels_array:
            if gt_label[0] == predit_label[0]:
                len_gt = len(gt_label[1:])
                len_predit = len(predit_label[1:])
                print('compare_attr_class', len_gt, len_predit, gt_label,
                      predit_label)
                if len_gt > len_predit:
                    for i, gt in enumerate(gt_label[1:]):
                        try:
                            pred = predit_label[1 + i]
                            print(">", gt, pred)
                            target_array[y_axis_labels.index(gt),
                                         x_axis_labels.index(pred)] += 1
                        except IndexError:
                            target_array[y_axis_labels.index(gt),
                                         x_axis_labels.index('EOS')] += 1
                elif len_gt < len_predit:
                    for i, pred in enumerate(predit_label[1:]):
                        try:
                            gt = gt_label[1 + i]
                            print("<", gt, pred)
                            target_array[y_axis_labels.index(gt),
                                         x_axis_labels.index(pred)] += 1
                        except IndexError:
                            target_array[y_axis_labels.index('EOS'),
                                         x_axis_labels.index(pred)] += 1
                else:
                    if len_gt == 0:
                        target_array[y_axis_labels.index('EOS'),
                                     x_axis_labels.index('EOS')] += 1
                    else:
                        for gt, pred in zip(gt_label[1:], predit_label[1:]):
                            print("=", gt, pred)
                            target_array[y_axis_labels.index(gt),
                                         x_axis_labels.index(pred)] += 1
    return target_array
def attribute_classification_evaluate(model: Model, start_idx, end_idx,
                                      input_shape, decoder_config):
    lines = read_file(decoder_config['data_path'], 'splitlines')
    token_list = decoder_config['token_list']
    loss, acc = model.evaluate_generator(
        attributes_data_generator(lines[start_idx:end_idx], BATCH_SIZE,
                                  input_shape, token_list),
        steps=max(1, (end_idx - start_idx) // BATCH_SIZE))
    res = "\nLoss: %.4f, Accuracy: %.3f%% \n " % (loss, acc * 100)
    print(res)
    return res
Пример #5
0
def create_attribute_classfication_dataset(attr_positions_folder,
                                           image_folder,
                                           element_folder,
                                           target_path,
                                           record_path,
                                           label_list,
                                           element_start_index,
                                           file_start_index=0,
                                           file_num=1,
                                           balance=True,
                                           initial_each_element=[0, 0, 0],
                                           proportion=[1, 1, 1]):
    element_index = element_start_index
    num_each_element = initial_each_element
    prop_each_element = proportion
    with open(target_path, 'a+') as f:
        for file_idx in range(file_start_index, file_start_index + file_num):
            img = cv2.imread(image_folder + str(file_idx) + TYPE.IMG)
            read_positions = read_file(
                attr_positions_folder + str(file_idx) + TYPE.TXT, 'splitlines')
            positions = [position.split() for position in read_positions]
            for position in positions:
                if balance:
                    t = int(position[0])
                    if sum(prop_each_element) > 0 and (
                            num_each_element[t] /
                        (element_index + 1) > prop_each_element[t] /
                        (sum(prop_each_element))):
                        continue
                    else:
                        num_each_element[t] += 1

                sub_img = splitImage(img, position)
                attributes = position[5:]
                element_file_name = element_folder + str(
                    element_index) + TYPE.IMG
                f.write('{} {}\n'.format(
                    element_file_name, ' '.join([str(a) for a in attributes])))
                cv2.imwrite(element_file_name, sub_img)
                element_index += 1
            print(file_idx) if file_idx % 10 == 0 else None

    record = 'number of used file: {}\nnumber of total_elements: {}\nnumber of type 0 [Title]: {}\nnumber of type 1 [Text]: {}\nnumber of type 2 [Btn]: {}\nnumber of type 3 [Text_input]: {}\nprop: {}'.format(
        file_start_index + file_num, element_index, num_each_element[0],
        num_each_element[1], num_each_element[2], num_each_element[3],
        prop_each_element)
    write_file(record, record_path, 0)
    return element_index
Пример #6
0
 def __init__(self, master, buttonList, img_path, positions_path,
              output_position_path):
     ttk.Frame.__init__(self, master)
     self.close_flag = False
     self.output_position_path = output_position_path
     self.grid()
     self.winfo_toplevel().title("Label GUI-" + img_path.split('\\')[-1])
     self.winfo_toplevel().geometry("1200x900")
     self.img = cv2.imread(img_path)
     self.class_position = []
     self.nowPositionIndex = 0
     read_positions = read_file(positions_path, 'splitlines')
     self.positions = [position.split() for position in read_positions]
     self.len_position = len(self.positions)
     self.initWindow(buttonList)
     self.changeImage()
def attribute_classfication_training(train_model: Model,
                                     encoder_config,
                                     decoder_config,
                                     checkpoint_folder,
                                     analysis_saved_folder,
                                     final_model_saved_path,
                                     initial_epoch=0,
                                     keep_ratio=True):
    createFolder(checkpoint_folder + str(EPOCHES))
    mc = callbacks.ModelCheckpoint(checkpoint_folder + str(EPOCHES) +
                                   '\\attr-classfy-weights{epoch:05d}.h5',
                                   save_weights_only=True,
                                   period=MODE_SAVE_PERIOD)
    early_stopping = callbacks.EarlyStopping(monitor='val_loss',
                                             min_delta=0,
                                             patience=10,
                                             verbose=1)
    lines = read_file(decoder_config['data_path'], 'splitlines')
    num_train = encoder_config['num_train']
    num_valid = encoder_config['num_valid']
    token_list = decoder_config['token_list']
    input_shape = encoder_config['input_shape']
    # print('-----config----- \nnum_train: {}\nnum_valid: {}\ninput_shape: {}\nsteps_per_epoch: {}\n'.format(num_train, num_valid, input_shape, max(1, num_train//BATCH_SIZE)))
    history = train_model.fit_generator(
        attributes_data_generator(lines[:num_train],
                                  BATCH_SIZE,
                                  input_shape,
                                  token_list,
                                  keep_ratio=keep_ratio),
        steps_per_epoch=max(1, num_train // BATCH_SIZE),
        validation_data=attributes_data_generator(
            lines[num_train:num_train + num_valid], BATCH_SIZE, input_shape,
            token_list),
        validation_steps=max(1, num_valid // BATCH_SIZE),
        epochs=EPOCHES,
        initial_epoch=initial_epoch,
        callbacks=[mc, early_stopping])

    showLoss(history, analysis_saved_folder, 'loss' + str(EPOCHES))
    showAccuracy(history, analysis_saved_folder, 'accuracy' + str(EPOCHES))
    write_file(history.history,
               analysis_saved_folder + 'history' + str(EPOCHES) + TYPE.TXT,
               'JSON')
    train_model.save(final_model_saved_path)
    return train_model
Пример #8
0
def create_attribute_classfication_dataset_old(positions_folder, image_folder,
                                               target_path):
    target_content = {
        'labels': [
            Font_color.dark.value, Font_color.primary.value,
            Font_color.white.value, Bg_color.primary.value,
            Bg_color.dark.value, Bg_color.success.value,
            Bg_color.warning.value, Bg_color.danger.value
        ],
        'total_data_file_num':
        3,
        'training_data_num':
        0,
        'testing_data_num':
        0,
        'total_data_num':
        0,
        'data': [],
    }

    for index in range(target_content['total_data_file_num']):
        img = cv2.imread(image_folder + str(index) + TYPE.IMG)
        read_positions = read_file(positions_folder + str(index) + TYPE.TXT,
                                   'splitlines')
        positions = [position.split() for position in read_positions]
        one_file_elements = to_attrcate_data(positions, img,
                                             target_content['labels'])
        target_content['data'] += one_file_elements

        print('file: ', index) if index % 5 == 0 else None
    target_content['total_data_num'] = len(target_content['data'])
    target_content['training_data_num'] = int(
        len(target_content['data']) * 0.8)
    target_content['testing_data_num'] = int(len(target_content['data']) * 0.2)
    # write_file(target_content, target_path, 'JSON')
    with open(target_path, 'w') as f:
        json.dump(target_content, f)
    print('save json file: ', target_path)
    return target_content
Пример #9
0
def to_Seq2Seq_input(encoder_file_folder,
                     decoder_file_folder,
                     encoder_config,
                     decoder_token_list: list,
                     data_num=None,
                     data_start_idx=0):
    num_total_data = data_num
    if data_num == None:
        list1 = os.listdir(encoder_file_folder)
        num_total_data = len(list1)
    # num_total_data = 1
    decoder_target_tokens = decoder_tokens_list_to_dict(decoder_token_list)
    encoder_direct_part = encoder_config['direct_part']
    encoder_tokens = None
    if encoder_config['class_mode']:
        encoder_tokens = [{e: (i + 1)
                           for i, e in enumerate(c)}
                          for c in encoder_config['token_list']]
    else:
        encoder_tokens = {
            e: i
            for i, e in enumerate(encoder_config['token_list'])
        }

    print('encoder_tokens', encoder_tokens)

    temp_encoder_all_data = []
    temp_decoder_all_data = []
    max_encoder_len = 0
    max_decoder_len = 0
    print('data_start_idx, data_start_idx+num_total_data: ', data_start_idx,
          data_start_idx + num_total_data)
    for i in range(data_start_idx, data_start_idx + num_total_data):
        input_data = read_file(encoder_file_folder + str(i) + TYPE.TXT,
                               'splitlines')
        gui = read_file(decoder_file_folder + str(i) + TYPE.GUI, 'splitBySpec')
        # print(gui)
        temp_data = []
        for line in input_data:
            l = line.split()
            data = l[:encoder_direct_part]
            attrs = [0] * len(encoder_tokens)
            for attr in l[encoder_direct_part:]:
                if encoder_config['class_mode']:
                    for idx, target_list in enumerate(encoder_tokens):
                        try:
                            attrs[idx] = target_list[attr]
                        except KeyError:
                            pass
                else:
                    attrs[encoder_tokens[attr]] = 1
            temp_data.append(data + attrs)
        temp_encoder_all_data.append(temp_data)
        if len(temp_data) > max_encoder_len:
            max_encoder_len = len(temp_data)

        temp_decoder_all_data.append(['START'] + gui)
        if len(gui) + 1 > max_decoder_len:
            max_decoder_len = len(gui) + 1

    encoder_input_data = np.zeros((num_total_data, max_encoder_len,
                                   len(encoder_tokens) + encoder_direct_part),
                                  dtype='float32')
    decoder_input_data = np.zeros(
        (num_total_data, max_decoder_len, len(decoder_target_tokens)),
        dtype='float32')
    for i, (temp_data,
            gui) in enumerate(zip(temp_encoder_all_data,
                                  temp_decoder_all_data)):
        for j, data in enumerate(temp_data):
            encoder_input_data[i, j] = data
        for j, token in enumerate(gui):
            decoder_input_data[i, j, decoder_target_tokens[token]] = 1
        decoder_input_data[i, j + 1:, decoder_target_tokens['EOS']] = 1

    return encoder_input_data, decoder_input_data, decoder_target_tokens, max_decoder_len
Пример #10
0
    cnn_model = 'simple_VGG'
    dataset = 'pix2code'
    eva_record_path = path.EVALUATION_ATTR_CLASS_EVALUATION + dataset + "\\"
    eva_record_name = 'simple_VGG(74-224-256-e100)-test1.txt'
    predit_data_path = path.SELF + 'test-predit\\attr-class-predit\\' + dataset + "\\"
    predit_data_name = 'simple_VGG(74-224-256-e100)-test1'
    predit_data_num = 50
    final_model_saved_path = path.CLASS_ATTR_MODEL_PATH + str(
        EPOCHES) + '\\attr_class_model' + TYPE.H5
    # predit_model_path = r'E:\projects\NTUST\webimg-to-code\assets\attr_class-data3\test\simple-VGG\74-112-256\p0\model\100\attr_class_model.h5'
    predit_model_path = final_model_saved_path
    evaluate_model_path = final_model_saved_path

    encoder_config = get_attribute_encoder_config(1)
    decoder_config = get_attribute_decoder_config(1)
    lines = read_file(decoder_config['data_path'], 'splitlines')

    if TRAINING:
        # weight_path=r'E:\projects\NTUST\webimg-to-code\assets\attr_class-pix2code\test\simple-VGG\74-112-256\p0\model\22\attr_class_model.h5'
        weight_path = None
        createFolder(path.CLASS_ATTR_MODEL_PATH + str(EPOCHES))
        createFolder(path.CLASS_ATTR_WEIGHT + str(EPOCHES))
        train_model = attribute_classification_train_model(
            len(decoder_config['token_list']),
            input_shape=encoder_config['input_shape'],
            optimizer='Adadelta',
            cnn_model=cnn_model,
            weight_path=weight_path)
        attribute_classfication_training(
            train_model,
            encoder_config,
Пример #11
0
        weights = [(1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1),
                   (1, 0, 0, 0), (0.5, 0.5, 0, 0), (0.33, 0.33, 0.33, 0),
                   (0.25, 0.25, 0.25, 0.25)]
        labels = [
            'individual_1_gram', 'individual_2_gram', 'individual_3_gram',
            'individual_4_gram', 'cumulative_1_gram', 'cumulative_2_gram',
            'cumulative_3_gram', 'cumulative_4_gram'
        ]
        with open(history_file_name, 'a+') as file:
            file.write('labels list: {}\n'.format(labels))

            for idx in range(DATA_NUM):
                scores = [None] * 8
                input_seq = encoder_input_data[idx:idx + 1]
                reference_gui = read_file(
                    decoder_config[data_folder] + str(START_IDX + idx) +
                    TYPE.GUI, 'splitBySpec')

                decoded_sentence = seq2seq_predit(encoder_model,
                                                  decoder_model,
                                                  input_seq,
                                                  decoder_target_tokens,
                                                  max_decoder_len,
                                                  result_saved_path=None)

                for i in range(len(labels)):
                    scores[i] = sentence_bleu([reference_gui],
                                              decoded_sentence,
                                              weights=weights[i])
                    record_template['BLEU_SCORE'][labels[i]] += scores[i]
Пример #12
0
def manual_class_tag_from_file(img_path, position_path):
    read_positions = read_file(position_path, 'splitlines')
    positions = [position.split() for position in read_positions]
    img = cv2.imread(img_path)
    class_position, interrupt = manual_class_tag(positions, img)
    return class_position, interrupt
Пример #13
0
        createFolder(path.CLASS_SEQ2SEQ_PREDIT_GUI_PATH + str(SEQ2SEQ_EPOCHES))
        decoder_target_tokens = decoder_tokens_list_to_dict(decoder_config['token_list'])
        max_decoder_len = 300
        encoder_model, decoder_model = seq2seq_predit_model(
            load_model(predit_model_path), model_type=seq_model_type, layer2_lstm=layer2_lstm)
        # data_folder = 'testing_data_folder' if predit_test_data else 'data_folder'
        for data_folder, predit_data_num in zip(['data_folder', 'testing_data_folder'], predit_data_nums):
            valid_data_num = predit_data_num

            if BLEU_SCORE:
                bleu = Bleu(predit_data_num, 0, encoder_config[data_folder], decoder_config[data_folder], predit_model_path)
            if ERROR_SCORE:
                eva_error = Eva_error(0, encoder_config[data_folder], decoder_config[data_folder], predit_model_path)
            for i in range(predit_data_num):
                reference_gui = None
                input_seqs = read_file(
                    encoder_config[data_folder]+str(i)+TYPE.TXT, 'splitlines')
                if len(input_seqs)==0:
                    valid_data_num -= 1
                    continue
                input_seqs = [seq.split() for seq in input_seqs]
                input_seq = to_Seq2Seq_encoder_input(input_seqs, encoder_config)
                decoded_sentence = seq2seq_predit(encoder_model, decoder_model,
                                                input_seq=input_seq, decoder_tokens=decoder_target_tokens,
                                                max_decoder_seq_length=max_decoder_len,
                                                #   result_saved_path=path.CLASS_SEQ2SEQ_PREDIT_GUI_PATH + str(SEQ2SEQ_EPOCHES)+'\\'+str(i)+TYPE.GUI
                                                )
                
                print('decoded_sentence length: ', i, len(decoded_sentence)) if i%50==0 and BLEU_SCORE else None

                if BLEU_SCORE:
                    reference_gui = read_file(