def loading_data(project): train, test = loading_variable(project + '_train'), loading_variable(project + '_test') dictionary = (loading_variable(project + '_dict_msg'), loading_variable(project + '_dict_code')) return train, test, dictionary
plt.hist(data, bins=range(0, 40)) plt.title(check) plt.xlabel("Length") plt.ylabel("Frequency") plt.show() return mean(data), stdev(data) elif check == 'Length': new_data = list() for d in data: for f in d: new_data.append(len(f.split())) plt.hist(new_data, bins=range(0, 750)) plt.title(check) plt.xlabel("Length") plt.ylabel("Frequency") plt.show() return mean(new_data), stdev(new_data) if __name__ == '__main__': project = 'openstack' messages, codes = loading_variable(project + '_messages'), loading_variable(project + '_codes') print(type(messages), type(codes)) mean_, std_ = statistic_msg(data=messages) print(mean_, std_) mean_, std_ = statistic_code(data=codes, check='File') print(mean_, std_) mean_, std_ = statistic_code(data=codes, check='Length') print(mean_, std_)
info_label(data=labels_test) ids_train, ids_test = get_index(data=ids, index=train_index), get_index( data=ids, index=test_index) train = (ids_train, labels_train, pad_msg_train, pad_code_train) test = (ids_test, labels_test, pad_msg_test, pad_code_test) dict_msg, dict_code = dictionary_commit( data=pad_msg_train, type_data='msg'), dictionary_commit(data=pad_code_train, type_data='code') return train, test, dict_msg, dict_code if __name__ == '__main__': # project = 'openstack' project = 'qt' messages, codes = loading_variable(project + '_messages'), loading_variable(project + '_codes') ids, labels = loading_variable(project + '_ids'), convert_label( loading_variable(project + '_labels')) info_label(data=labels) print('Number of instances in commit message %i and commit code %i ' % (len(messages), len(codes))) print('Labels: %i' % (len(labels))) train, test, dict_msg, dict_code = folding_data(pad_msg=messages, pad_code=codes, labels=labels, ids=ids, n_folds=5) saving_variable(project + '_train', train) saving_variable(project + '_test', test) saving_variable(project + '_dict_msg', dict_msg)