Exemplo n.º 1
0
 def create_train_data(self):
     i = 0
     imgdatas = []
     imglabels = []
     imgs = glob.glob(
         self.data_path + "/*" +
         self.img_type)  #get a list of images' full names(including path)
     for imgname in imgs:
         midname = imgname[imgname.rindex("/") + 1:-4]  #get image name
         #            img=img_to_array(load_img(self.data_path+"/"+midname))
         img, img_h, img_w = util.get_image(self.data_path + "/" + midname +
                                            self.img_type)
         #            label=img_to_array(load_img(self.label_path+"/"+midname))
         label, y_h, y_w = util.get_label(self.label_path + "/" + midname +
                                          self.annot_img_type)
         imgdatas.append(img)
         imglabels.append(label)
         if i % 100 == 0:
             print('Done:{0}/{1} images'.format(i, len(imgs)))
         i += 1
     imgdatas = np.array(imgdatas, dtype=np.uint8)
     imglabels = np.array(imglabels, dtype=np.uint8)
     print("loading done")
     np.save(self.npy_path + '/imgs_train.npy', imgdatas)
     np.save(self.npy_path + '/imgs_mask_train.npy', imglabels)
     print('Saving to npy files done.')
Exemplo n.º 2
0
    def __init__(self, parent=None):
        super().__init__(parent)
        label_path = "./labels/imagenet_labels.pkl"

        self.labels = get_label(label_path)
        self.ui = uic.loadUi("./ui/Class_View.ui", self)
        self.cls = -1
        self.initUI()
Exemplo n.º 3
0
    def load_data(self, fname):
        """ load data from local file """
        facts = []

        accu_label = []
        article_label = []
        imprison_label = []

        with open(fname, 'r') as f:
            line = f.readline()
            while line:
                line_dict = json.loads(line, encoding="utf-8")

                fact = line_dict["fact"]

                accu = util.get_label(line_dict, "accu")
                article = util.get_label(line_dict, "law")
                imprison = util.get_label(line_dict, "time")

                facts.append(fact)

                accu_label.append(accu)
                article_label.append(article)
                imprison_label.append(imprison)

                line = f.readline()
        if util.DEBUG:
            print("training file loaded.")

        facts = self.cut_all(facts)

        if util.DEBUG:
            print("training data segmented.")

        accu_label = pd.Series(accu_label)
        article_label = pd.Series(article_label)
        imprison_label = pd.Series(imprison_label)

        if util.DUMP:
            self.dump_processed_data_to_file(facts, accu_label, article_label, imprison_label)

        return facts, accu_label, article_label, imprison_label
Exemplo n.º 4
0
set2_df = get_df('data/set2.json')

X_set1 = list()
Y_set1 = list()
X_set2 = list()
Y_set2 = list()

# prune, lemmatize and prepare text
for i in range(len(set1_df)):
    denote = ' '.join(set1_df.loc[i]['denotations'])
    string = ' '.join(preprocess_text(set1_df.loc[i]['text']))
    X_set1.append(set1_df.iloc[i]['sourceid'] + string + ' ' + denote)

#Encode labels for cross verification
le = LabelEncoder()
Y_set1 = le.fit_transform(get_label()[1])
label_set1 = set1_df['sourceid']

#TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
hash_matrix = vectorizer.fit_transform(X_set1)

# CLUSTER
# Split data into 6 clusters because data analysis shows an elbow at 6 and we also know that there are 6 labels
kmeans = KMeans(n_clusters=6, init='k-means++', max_iter=1000).fit(hash_matrix)

# get Cluster labels.
clusters = kmeans.labels_

print("Generated diseases\n\n")
for group in set(clusters):