def reshape(data: dict) -> (np.array, np.array): length = sum(len(lst) for lst in data.values()) # labels = np.empty(length, dtype='<U1') # y = np.ndarray(shape =(45, length), dtype='int32') y = [None] * length labels = [None] * length i = 0 for label, pictures in data.items(): for picture in pictures: labels[i] = label y[i] = picture.flatten() i += 1 return np.array(y), np.array(labels)
def process(data): links = [] titles = [] misc_data = [] for i in data.values(): links.append(i[1]) titles.append(i[0]) misc_data.append(i[2]) df = pd.DataFrame({'name of painting':titles, 'img link':links, 'info':misc_data}, index=[list(data.keys())]) l = [] for i in df['info']: s_l = [] for j in range(0,len(i),3): s_l.append(i[j+1:j+3]) l.append(s_l) df['l'] = l #Index by each painting, and not each era list_one = [] for i in range(len(df['name of painting'])): #go through each era list_two = [] for j in range(len(df['name of painting'][i])): #go through each list painting = df['name of painting'][i][j] era = list(df.index[i])[0] link = df['img link'][i][j] moreinfo = [] for k in df['l'][i][j]: moreinfo.append(k) list_two.append([era,painting,link,moreinfo[0]]) list_one.append(list_two) tolist = [j for i in list_one for j in i ] big_df = pd.DataFrame(tolist) big_df = big_df.rename(columns={0:'era', 1:'painting', 2:'url', 3:'metadata'}) #painting or drawing paintdraw = [] for i in big_df['metadata']: if 'paint' in i or 'canvas' in i: paintdraw.append('Painting') elif 'paper' in i: paintdraw.append('Drawing') else: paintdraw.append('N/A') big_df['painting or drawing'] = paintdraw big_df['metadata'] = big_df['metadata'].str.replace('\n','').str.replace(' ','') big_df['metadata'] = big_df['metadata'].str.split(',') #separate type and dimensions big_df["type"] = big_df["metadata"].str[0] big_df['dimensions'] = big_df['metadata'].str[1] big_df = big_df.drop(columns=['metadata']) return big_df
#embedding.append(emb_array) #ID.append(i) #real_ID = ID[i] real_ID = i to_json(i, emb_array) else: data = from_json() for k in range(0, len(data)): dist = np.sqrt( np.sum( np.square( np.subtract( emb_array, list(data.values())[k])))) if (dist < 1.0): to_json(k, emb_array) #embedding[k] = emb_array #real_ID = ID[k] real_ID = k present = True break if (present == False): index = int(len(data) + 1) to_json(index, emb_array) #embedding.append(emb_array) #ID.append(int(len(ID) + 1)) #real_ID = int(len(ID) + 1) real_ID = index