def __getitem__(self,idx): idx%=self.size inputs=cv2.imread(PJ(self.imgdir,self.namelist[idx]),0) targets=cv2.imread(PJ(self.labdir,self.namelist[idx]),0) crop=self.transforms(np.concatenate((inputs[...,np.newaxis],targets[...,np.newaxis]),axis=-1)) inputs=crop[0:1] targets=(crop[1:2]<=self.th).float() return inputs,(inputs,targets)
def __init__(self, data_root, data_file, class2idx, transform=None): self.data_root = PJ(os.getcwd(), *data_root) self.class2idx = class2idx self.transform = transform # Load data path with open(PJ(self.data_root, data_file), 'r') as f: data = f.readlines() self.data = [line.strip().split() for line in data]
def __init__(self, split_mode, state): self.split_mode = split_mode self.root = PJ('./dataset', state['dataset']) self.csv_file = PJ(self.root, 'list', state['mode'], self.split_mode.strip("_g") + '.txt') self.data = pd.read_csv(self.csv_file, header=None) self.img_transform = self.img_transform()
def __init__(self, cfg, mode, transforms=None): super(Dataset, self).__init__() subname = 'train' if 'train' in cfg['direction'] else 'test' self.imgdir = PJ(cfg['direction'], '{}_img'.format(subname)) self.labdir = PJ(cfg['direction'], '{}_label'.format(subname)) self.namelist = os.listdir(self.imgdir) self.th = cfg['th'] self.transforms = transforms self.size = len(self.namelist) self.repeat = cfg['repeat_time']
def _concept_split(): if state['mode'] == "train_val": return yaml.load( open( PJ(data_dir, 'train_val', 'id_split' + str(val_times) + '.txt'))) else: return yaml.load( open(PJ(data_dir, "train_test", 'id_split' + '.txt')))
def __init__(self, split_mode, state): self.dataset = state['dataset'] self.split_mode = split_mode self.root = PJ('./dataset', state['dataset']) self.file_name = str(state['val_times']) + '.txt' if state.get( 'val_times') else '.txt' self.csv_file = PJ(self.root, 'list', state['mode'], self.split_mode.strip("_g") + self.file_name) self.data = pd.read_csv(self.csv_file, header=None) self.img_transform = self.img_transform()
def __getitem__(self,idx): idx=idx%self.size inputs=cv2.imread(PJ(self.imgdir,self.namelist[idx]),0) targets=cv2.imread(PJ(self.labdir,self.namelist[idx]),0) if self.train: crop=self.transforms(np.concatenate((inputs[...,np.newaxis],targets[...,np.newaxis]),axis=-1)) inputs=crop[0:1].unsqueeze(0) targets=(crop[1:2]<=self.th).float().unsqueeze(0) else: h,w=inputs.shape H,W=h//self.cell,w//self.cell inputs=self.transforms(inputs) targets=(self.transforms(targets)<=self.th).float() inputs=inputs.reshape(H,self.cell,W,self.cell).permute(0,2,1,3).reshape(H*W,self.cell,self.cell).unsqueeze(1) targets=targets.reshape(H,self.cell,W,self.cell).permute(0,2,1,3).reshape(H*W,self.cell,self.cell).unsqueeze(1) return inputs,targets
def __getitem__(self, idx): image = Image.open(PJ(self.root, 'img', self.data.iloc[idx, 0])).convert('RGB') label = torch.FloatTensor(self.data.iloc[idx, 1:].tolist()) sample = {'image': self.img_transform(image), 'label': label} return sample
def __init__(self, data_root, data_file, class2idx, transform=None): self.data_root = PJ(os.getcwd(), *data_root) self.class2idx = class2idx self.transform = transform # Load data path images = [] dir = os.path.expanduser(PJ(self.data_root, data_file)) for target in sorted(self.class2idx.keys()): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): path = os.path.join(root, fname) item = [path, target] images.append(item) self.data = images
def __getitem__(self, idx): image = Image.open(PJ(self.root, 'img', self.data.iloc[idx, 0])).convert('RGB') image = self.img_transform(image) label = torch.LongTensor([self.data.iloc[idx, 1]]) sample = {'image': image, 'label': label} return sample
def __getitem__(self, index): """ Call by DataLoader(an Iterator) """ image_path, class_name = self.data[index] # Label (Type is torch.LongTensor for calculate loss) label = torch.LongTensor([self.class2idx[class_name]]) # Load image and transform image = Image.open(PJ(self.data_root, image_path)).convert('RGB') image = self.transform(image) if self.transform else image return label, image
def ConceptSets(state, concepts): split_list = state['split_list'] + ['general'] data_dir = PJ('./dataset', state['dataset'], 'list') concept_file = pd.read_csv( PJ(data_dir, 'concepts', 'concepts_' + concepts + '.txt')) val_times = None if state['mode'] == 'train_test' else state['val_times'] def _concept_split(): if state['mode'] == "train_val": return yaml.load( open( PJ(data_dir, 'train_val', 'id_split' + str(val_times) + '.txt'))) else: return yaml.load( open(PJ(data_dir, "train_test", 'id_split' + '.txt'))) def _concept(split_mode, concept_split): if split_mode in ['train', 'trainval', 'test_seen']: concept_label = concept_split['train_id'] elif split_mode in ['val', 'test', 'test_unseen']: concept_label = concept_split['test_id'] elif split_mode in ['general']: concept_label = list(range(concept_file.shape[1])) else: assert "Split Mode Error" concept_vector = [ torch.cuda.FloatTensor(concept_file.iloc[:, i].values) for i in concept_label ] return {'label': concept_label, 'vector': torch.stack(concept_vector)} concept_split = _concept_split() return {s: _concept(s, concept_split) for s in split_list}
def _concept_split(): if state['mode'] == "train_val": # load origin train_id ids = yaml.load( open(PJ(data_dir, 'train_test', 'id_split' + '.txt'))) ids = ids['train_id'] # random generate id_split val_nums = {'apy': 5, 'awa2': 13, 'cub': 50, 'sun': 65} val_num = val_nums[state['dataset']] random.shuffle(ids) id_split = { 'train_id': sorted(ids[val_num:]), 'test_id': sorted(ids[:val_num]) } print(id_split['train_id']) print(id_split['test_id']) # produce split data file data = pd.read_csv(PJ(data_dir, "train_val", "trainval.txt"), header=None) train_data = data[data.iloc[:, 1].isin(id_split['train_id'])] train_data.to_csv(PJ(data_dir, "train_val", "train.txt"), index=False, header=False) test_data = data[data.iloc[:, 1].isin(id_split['test_id'])] test_data.to_csv(PJ(data_dir, "train_val", "val.txt"), index=False, header=False) return id_split else: return yaml.load( open(PJ(data_dir, state['mode'], 'id_split' + '.txt')))
import re import pandas as pd import numpy as np import scipy.io as sio from os.path import join as PJ DATASET = "awa2" concept_vec_filename = "concepts_bert.txt" ROOT = PJ("..", "dataset") concept_filename = PJ(ROOT, DATASET, "list", "concepts", "concepts.txt") concept_vec_filename = PJ(ROOT, DATASET, "list", "concepts", concept_vec_filename) weight_path = PJ(ROOT, "bert", "bert_word_embedding_all.npy") ATT_SPLITS = sio.loadmat(PJ(ROOT, "xlsa17", "data", DATASET, "att_splits.mat")) # Output Replaced Word if DATASET == 'sun': wordlist = [c[0][0] for c in ATT_SPLITS['allclasses_names']] with open(concept_filename, "w") as f: f.writelines("\n".join(wordlist)) wordlist = [re.sub("_", " ", word.lower()) for word in wordlist] elif DATASET == 'cub': with open(concept_filename) as f: wordlist = [ re.sub("_", " ", re.sub("[\d\s\.]", "", word).lower()) for word in f.readlines() ]
from datasets.Dataset import CIFAR10Dataset from networks import ResNet18, HaHaNet from utils import config, data_transform from test import test if __name__ == '__main__': ######################################## # Environment and Experiment setting ######################################## # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('--exp', '-e', type=str, default='exp1_ResNet18') args = parser.parse_args() # Load experiment config config_path = PJ(os.getcwd(), "configs", f"{args.exp}.yaml") config = config(config_path) exp_name = config['exp_name'] print(f"EXP: {exp_name}") # Create saving directory save_root = PJ(os.getcwd(), "results", exp_name) if not os.path.exists(save_root): os.makedirs(save_root) print(f"Create {save_root}") # Tensorboad writer = SummaryWriter(PJ(os.getcwd(), "results", "logs", exp_name)) # Saving config file shutil.copy(config_path, PJ(save_root, f"{exp_name}.yaml")) # Show some experiment info
import pymarc #Please load the supplied version of pymarc (install using pip install ./) import numpy as np import ujson import json import bgzf import struct import os from tqdm.auto import tqdm from os.path import join as PJ #Set to the location where INSPIRE dump is. inspireDumpPath = PJ("..", "Data", "INSPIRE", "20210108") processedDataPath = PJ(inspireDumpPath, "Processed") os.makedirs(processedDataPath, exist_ok=True) def getTagSubtag(data, subtag, onErrorParent=True): if (isinstance(data, dict)): value = None # try: if (subtag not in data): value = None else: value = data[subtag] # except:
def ConceptSets(state, concepts): split_list = state['split_list'] + ['general'] data_dir = PJ('./dataset', state['dataset'], 'list') concept_file = pd.read_csv( PJ(data_dir, 'concepts', 'concepts_' + concepts + '.txt')) def _concept_split(): if state['mode'] == "train_val": # load origin train_id ids = yaml.load( open(PJ(data_dir, 'train_test', 'id_split' + '.txt'))) ids = ids['train_id'] # random generate id_split val_nums = {'apy': 5, 'awa2': 13, 'cub': 50, 'sun': 65} val_num = val_nums[state['dataset']] random.shuffle(ids) id_split = { 'train_id': sorted(ids[val_num:]), 'test_id': sorted(ids[:val_num]) } print(id_split['train_id']) print(id_split['test_id']) # produce split data file data = pd.read_csv(PJ(data_dir, "train_val", "trainval.txt"), header=None) train_data = data[data.iloc[:, 1].isin(id_split['train_id'])] train_data.to_csv(PJ(data_dir, "train_val", "train.txt"), index=False, header=False) test_data = data[data.iloc[:, 1].isin(id_split['test_id'])] test_data.to_csv(PJ(data_dir, "train_val", "val.txt"), index=False, header=False) return id_split else: return yaml.load( open(PJ(data_dir, state['mode'], 'id_split' + '.txt'))) def _concept(split_mode, concept_split): if split_mode in ['train', 'trainval', 'test_seen']: concept_label = concept_split['train_id'] elif split_mode in ['val', 'test_unseen']: concept_label = concept_split['test_id'] elif split_mode in ['general']: concept_label = list(range(concept_file.shape[1])) else: assert "Split Mode Error" concept_vector = { i: torch.cuda.FloatTensor(concept_file.iloc[:, i].values) for i in concept_label } id2label = {idx: label for idx, label in enumerate(concept_label)} return { 'concept_label': concept_label, 'concept_vector': concept_vector, 'id2label': id2label } concept_split = _concept_split() return {s: _concept(s, concept_split) for s in split_list}
################################# toImage = Image.new('RGBA',(1280,960)) for j in range(4): print("{:d}dpf".format(j+1) ) c1 = 0 c2 = 0 c3 = 0 c4 = 0 arr = arr_txt[j] for i in range(file_count[j]): b = PJ("fish/test_data", arr_file[j], arr[i]) img = image.load_img(b , target_size = (800,800)) x = image.img_to_array(img) x = np.expand_dims(x , axis = 0) preds = model.predict(x) a = np.argmax(preds) if a == 0: c1+=1 elif a == 1: c2+=1
for tn in STATE['split_list']: df = datasets[tn].data.iloc[:, 1:].sum(axis=0) print(tn + ": " + str(df[df > 0].skew())) train_loader = DataLoader(datasets['trainval'], batch_size=CONFIG['train_batch_size'], shuffle=True) test_loaders = {tn: DataLoader(datasets[tn], batch_size=CONFIG['test_batch_size'], shuffle=False) for tn in STATE['split_list'][1:]} ########################################################################################## # experiment for n times for exp_times in range(CONFIG['exp_times']): SAVE_PATH = PJ('.', 'runs_test', DATASET, EXP_NAME, str(exp_times)) writer = SummaryWriter(PJ(SAVE_PATH)) # set experiment type: classifier / transformer if CONFIG['type'] == "classifier": model = classifier(backbone=CONFIG['model'], k=CONFIG['k'], d=CONFIG['d'][CONFIG['concepts']][DATASET], pretrained=CONFIG['pretrained'], freeze=CONFIG['freeze']) elif CONFIG['type'] == "transformer": model = transformer(backbone=CONFIG['model'], linear=CONFIG['linear'], k=CONFIG['k'], d=CONFIG['d'][CONFIG['concepts']][DATASET], pretrained=CONFIG['pretrained'], freeze=CONFIG['freeze']) else: assert False, "Must Assign the model type: classifier or transformer"
import os from os.path import join as PJ import random random.seed(1024) DATASET = "CIFAR10" DATA_ROOT = PJ(os.path.dirname(os.getcwd()), "datasets", DATASET) for data_file in ["test", "train"]: dir = os.path.expanduser(PJ(DATA_ROOT, data_file)) class_names = os.listdir(dir) if data_file == 'train': items = [] val_items = [] for c in class_names: pathes = [PJ(data_file, c, p) for p in os.listdir(PJ(dir, c))] val_pathes = random.sample(pathes, len(pathes) // 10) train_pathes = list(set(pathes) - set(val_pathes)) items += [" ".join([tp, c]) for tp in train_pathes] val_items += [" ".join([vp, c]) for vp in val_pathes] with open(PJ(DATA_ROOT, "val.txt"), "w") as f: f.write('\n'.join(val_items)) else: items = [" ".join([PJ(data_file, c, p), c]) for c in sorted(class_names) for p in os.listdir(PJ(dir, c))]
import gzip import sys import re from datetime import datetime as dt import sys import matplotlib import nltk nltk.download("stopwords") from nltk.corpus import stopwords matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 MAGPath = PJ("..","Data","MAG") #PATH TO THE MAG DATA MAGPath = PJ("/gpfs/sciencegenome/AuthorDynamics/MAG/mag-2021-01-05/mag") dataPath = PJ("..","Data","Processed") figuresPath = PJ("..","Figures") graycolor = "#808080" HCAProjectDataPath = PJ("..","Data","Publications","Biomedical Papers","HumanCellAtlas.json") HGPTitles2MAGPath = PJ("..","Data","Publications","Biomedical Papers","HGP_Publications_MAG.tsv") minPaperCount = 1; rebuildAll = False temporaryPath = PJ("..","Temp") os.makedirs(dataPath, exist_ok=True) os.makedirs(temporaryPath, exist_ok=True)
######################################## # Environment and Experiment setting ######################################## # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('--exp', '-e', type=str, default='exp1_ResNet18') parser.add_argument('--model_weights', '-w', type=str, default='') args = parser.parse_args() # Load experiment config config_path = f"./configs/{args.exp}.yaml" config = config(config_path) exp_name = config['exp_name'] print(f"EXP: {exp_name}") save_root = PJ(f"./results", exp_name) # Show some experiment info model_weights = args.model_weights print(f"Model: {config['model']}, Weights: {model_weights}") print(f"Pretrained: {config['pretrained']}, Freeze: {config['freeze']}") ######################################## # Data loader ######################################## class2idx = config['class2idx'] idx2class = {class2idx[k]: k for k in class2idx.keys()} # Dataset transform = data_transform(config, train=False) testset = CIFAR10Dataset(config['data_root'], config['test_file'], class2idx, transform)
import re import pandas as pd import numpy as np from os.path import join as PJ import scipy.io as sio from gensim.models import KeyedVectors DATASET = "cub" concept_vec_filename = "concepts_new.txt" # concept_vec_filename = "concepts_old.txt" weight_path = "self-enwiki-gensim-normed-win10.bin" # weight_path = "enwiki-gensim-normed.bin" ROOT = PJ("..", "dataset") concept_filename = PJ(ROOT, DATASET, "list", "concepts", "concepts.txt") concept_vec_filename = PJ(ROOT, DATASET, "list", "concepts", concept_vec_filename) weight_path = PJ(ROOT, "enwiki", weight_path) ATT_SPLITS = sio.loadmat(PJ(ROOT, "xlsa17", "data", DATASET, "att_splits.mat")) RES101 = sio.loadmat(PJ(ROOT, "xlsa17", "data", DATASET, "res101.mat")) setting = { 'apy': { 'replace_word': { 'diningtable': 'tables', 'pottedplant': 'houseplant', # 'tvmonitor': 'tv' 'tvmonitor': 'flat_panel_display' }
import os from os.path import join as PJ import scipy.io as sio from pandas import DataFrame as df import yaml from sklearn.preprocessing import LabelBinarizer # Setting DATASET = "awa2" ROOT = PJ("..", "dataset") XLSA17 = PJ(ROOT, "xlsa17", "data", DATASET) CONCEPT = PJ(ROOT, DATASET, "list", "concepts", "concepts.txt") CONCEPT_LIST = PJ(ROOT, DATASET, "list", "concepts") SPLIT_LIST = { "train_test": PJ(ROOT, DATASET, "list", "train_test"), "train_val": PJ(ROOT, DATASET, "list", "train_test") } ATT_SPLITS = sio.loadmat(PJ(XLSA17, "att_splits.mat")) RES101 = sio.loadmat(PJ(XLSA17, "res101.mat")) #################### # check path exist # #################### if not os.path.isdir(PJ(ROOT, DATASET, "list")): os.makedirs(PJ(ROOT, DATASET, "list")) if not os.path.isdir(CONCEPT_LIST): os.makedirs(CONCEPT_LIST)
import os from os.path import join as PJ import scipy.io as sio from pandas import DataFrame as df import yaml # Setting DATASET = "apy" ROOT = PJ("..", "dataset") XLSA17 = PJ(ROOT, "xlsa17", "data", DATASET) CONCEPT = PJ(ROOT, DATASET, "list", "concepts", "concepts.txt") CONCEPT_LIST = PJ(ROOT, DATASET, "list", "concepts") SPLIT_LIST = PJ(ROOT, DATASET, "list", "train_test") ATT_SPLITS = sio.loadmat(PJ(XLSA17, "att_splits.mat")) RES101 = sio.loadmat(PJ(XLSA17, "res101.mat")) # check path exist if not os.path.isdir(PJ(ROOT, DATASET, "list")): os.makedirs(PJ(ROOT, DATASET, "list")) if not os.path.isdir(CONCEPT_LIST): os.makedirs(CONCEPT_LIST) if not os.path.isdir(SPLIT_LIST): os.makedirs(SPLIT_LIST) # reorganize data img_files = [filter(None, i[0][0].split('/')) for i in RES101['image_files']] img_files = [PJ(*list(i)[5:]) for i in img_files]
from os.path import join as PJ import scipy.io as sio from pandas import DataFrame as df from pandas import read_csv import cv2 import os DATASET = "apy" ROOT = PJ("..", "dataset") XLSA17 = PJ(ROOT, "xlsa17", "data", DATASET) ATT_SPLITS = sio.loadmat(PJ(XLSA17, "att_splits.mat")) RES101 = sio.loadmat(PJ(XLSA17, "res101.mat")) ORIGIN_ATTR = read_csv(PJ(XLSA17, "origin_attr.txt"), delimiter=" ", header=None) origin_data = ORIGIN_ATTR.iloc[:, 0:6] # print(origin_data) concepts = [label[0][0] for label in ATT_SPLITS['allclasses_names']] # reorganize data img_files = [filter(None, i[0][0].split('/')) for i in RES101['image_files']] img_files = [PJ(*list(i)[5:]) for i in img_files] labels = RES101['labels'].reshape(-1) labels = labels - 1 data = df({'img_path': img_files, 'label': labels})
import os from os.path import join as PJ import scipy.io as sio from pandas import DataFrame as df # Setting DATASET = "awa2" ROOT = PJ("..", "dataset") XLSA17 = PJ(ROOT, "xlsa17", "data", DATASET) CONCEPT = PJ(ROOT, DATASET, "list", "concepts", "concepts.txt") CONCEPT_LIST = PJ(ROOT, DATASET, "list", "concepts") SPLIT_LIST = PJ(ROOT, DATASET, "list", "train_val") ATT_SPLITS = sio.loadmat(PJ(XLSA17, "att_splits.mat")) RES101 = sio.loadmat(PJ(XLSA17, "res101.mat")) # check path exist if not os.path.isdir(PJ(ROOT, DATASET, "list")): os.makedirs(PJ(ROOT, DATASET, "list")) if not os.path.isdir(CONCEPT_LIST): os.makedirs(CONCEPT_LIST) if not os.path.isdir(SPLIT_LIST): os.makedirs(SPLIT_LIST) # reorganize data img_files = [filter(None, i[0][0].split('/')) for i in RES101['image_files']] img_files = [PJ(*list(i)[5:]) for i in img_files]
results['errors'].append(msg) #results['brainlife'].append({"type": "error", "msg": msg}) print(msg) exitApp() configFilename = "config.json" argCount = len(sys.argv) if (argCount > 1): configFilename = sys.argv[1] outputDirectory = "output" figuresOutputDirectory = os.path.join(outputDirectory, "figures") outputFile = PJ(outputDirectory, "network.json.gz") if (not os.path.exists(outputDirectory)): os.makedirs(outputDirectory) with open(configFilename, "r") as fd: config = json.load(fd) if (not os.path.exists(outputDirectory)): os.makedirs(outputDirectory) # "color-property": "degree", # "size-property":"degree" colorProperty = "Degree" sizeProperty = "Degree"
if __name__ == '__main__': from models.network import * import argparse import os from os.path import join as PJ from RandAugment import RandAugment from custom_data_io import myset import torch.optim as optim from DML import DML parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp', type=str, default='exp_1') args = parser.parse_args() config_path = PJ(os.getcwd(), f"{args.exp}.yaml") config = config(config_path) exp_name = config['exp_name'] print(f"EXP: {exp_name}") train_transforms = transforms.Compose([transforms.Resize((385, 505)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) test_transforms = transforms.Compose([transforms.Resize((385, 505)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) train_transforms.transforms.insert(0, RandAugment(2, 14))
transforms = T.Compose([ T.ToPILImage(), T.ToTensor(), ]) dataset = Dataset(cfg, mode, transforms) loader = torch.utils.data.DataLoader(dataset, batch_size=cfg['batch_size'], shuffle=cfg['shuffle'], num_workers=cfg['num_workers']) return loader if __name__ == "__main__": mode = 'test' direction = { 'test': PJ("D:\\Data", "ISBI-2012-EM", "new_test_set"), "train": PJ("D:\\Data", "ISBI-2012-EM", "new train set") } cfg = { "file_name": "ISBI", "num_workers": 1, "th": 0.6, 'repeat_time': 10, "batch_size": 1, "shuffle": True, 'direction': direction[mode] } d = iter(dataloader(cfg, mode)) for kkk in range(10): i, t = next(d) print(t.sum() / 512 / 512)