def load_data(self, need_shuffle=True): raise_if_not_found(self.data_path) chunk = pd.read_csv(self.data_path, header=0, delimiter="#", dtype={'target': np.str}) if need_shuffle: chunk = shuffle(chunk) return self.__process_chunk(*self.__extract_xy(chunk))
def gen_data(self, need_shuffle=True): raise_if_not_found(self.data_path) reader = pd.read_csv(self.data_path, header=0, delimiter="#", chunksize=self.batch_size) for chunk in reader: if need_shuffle: chunk = shuffle(chunk) yield self.__process_chunk(*self.__extract_xy(chunk))
def load_d6table(self): raise_if_not_found(self.naics_codes_path) return pd.read_csv(self.naics_codes_path, header=0, delimiter="#", dtype={"code": np.str})