def padding_data(data, dictionary, params, type): if type == 'msg': pad_msg = padding_message(data=data, max_length=params.msg_length) pad_msg = mapping_dict_msg(pad_msg=pad_msg, dict_msg=dictionary) return pad_msg elif type == 'code': pad_code = padding_commit_code(data=data, max_line=params.code_line, max_length=params.code_length) pad_code = mapping_dict_code(pad_code=pad_code, dict_code=dictionary) return pad_code else: print('Your type is incorrect -- please correct it') exit()
obj = pickle.load(f) f.close() messages, codes, labels, ids = obj labels = convert_label(labels) codes = clean_and_reformat_code(data=codes) info_label(data=labels) print('Number of instances in commit message %i and commit code %i ' % (len(messages), len(codes))) print('Labels: %i' % (len(labels))) dict_msg, dict_code = dictionary_commit(data=messages, type_data='msg'), dictionary_commit(data=codes, type_data='code') pad_msg = padding_message(data=messages, max_length=256) added_code, removed_code = codes pad_added_code = padding_commit_code(data=added_code, max_file=3, max_line=10, max_length=256) pad_removed_code = padding_commit_code(data=removed_code, max_file=3, max_line=10, max_length=256) pad_msg = mapping_dict_msg(pad_msg=pad_msg, dict_msg=dict_msg) pad_added_code = mapping_dict_code(pad_code=pad_added_code, dict_code=dict_code) pad_removed_code = mapping_dict_code(pad_code=pad_removed_code, dict_code=dict_code) data = (pad_msg, pad_added_code, pad_removed_code, labels, dict_msg, dict_code, ids) print('Dictionary message: %i -- Dictionary code: %i' % (len(dict_msg), len(dict_code))) print('Shape of commit message:', pad_msg.shape) print('Shape of added code:', pad_added_code.shape) print('Shape of removed code:', pad_removed_code.shape) print('Shape of labels:', labels.shape) print('Ids of projects:', project, len(ids)) write_data = open('../data/jit_' + project + '.pkl', 'wb') pickle.dump(data, write_data)