def __init__(self, uriList, streamType, moreVars, expandPatterns=True): DataProvider.__init__(self, streamType, moreVars) self.expandPatterns = expandPatterns if "expandPatterns" in moreVars: if self.expandPatterns=="False" or self.expandPatterns=="false": self.expandPatterns = False del moreVars["expandPatterns"] if self.expandPatterns: fileNamePatterns = [] for f in uriList: if f.startswith("file://"): fileNamePatterns.append(f[7:]) else: fileNamePatterns.append(f) else: fileNamePatterns = uriList if len(fileNamePatterns) == 0: raise NoFilesSpecified() self.myFileNames = [name for name in expandFiles(fileNamePatterns, shouldOpen=False, checkPattern=self.expandPatterns) ] if not self._streamType: colonPos = self.myFileNames[0].rfind(":") if colonPos==-1 or (colonPos==1 and len(self.myFileNames)>=3 and self.myFileNames[2]=='\\'): name = self.myFileNames[0] else: name = self.myFileNames[0][0:colonPos] ext = os.path.splitext(name)[1] if ext == ".gz": # try to get prev extension before gzip ext = os.path.splitext(os.path.splitext(name)[0])[1] self._streamType = io_targets.getTypeByExtension(ext) if not self._streamType: raise UnknownExtensionType(name)
def __init__(self, rss_url, max_news=2): """ .ctor :return: """ DataProvider.__init__(self, max_news=max_news) self.rss_url = rss_url self.max_new_article = max_news
def __init__(self, cfg): DataProvider.__init__(self, cfg) # Load training images (path) and labels train_path = os.path.join(Paths.data_path, 'cell/labels/train.csv') test_path = os.path.join(Paths.data_path, 'cell/labels/test.csv') data_type = {'image_name': np.str, 'label': np.int} self._train_df = pd.read_csv(train_path, dtype=data_type) self._test_df = pd.read_csv(test_path, dtype=data_type) self._train_list = list(self._train_df.index) # random.shuffle(self._train_list) self._test_list = list(self._test_df.index) self._test_size = len(self._test_list) self._train_index = 0 self._test_index = 0
def __init__(self, cfg): DataProvider.__init__(self, cfg) csv_dir = os.path.join(Paths.data_path, 'thyroid nodule/annotations', self._cfg.dir_name) train_path = os.path.join(csv_dir, 'train.csv') test_path = os.path.join(csv_dir, 'test.csv') data_type = { 'image_name': np.str, 'x1': np.int, 'y1': np.int, 'x2': np.int, 'y2': np.int, 'label': np.int } self._train_df = pd.read_csv(train_path, dtype=data_type) self._test_df = pd.read_csv(test_path, dtype=data_type) self._train_list = list(self._train_df.index) random.shuffle(self._train_list) self._test_list = list(self._test_df.index) self._test_size = len(self._test_list) self._train_index = 0 self._test_index = 0
def __init__(self, data_split, batchsize=1): DataProvider.__init__(self, data_split, batchsize)