def _fit_child(self, skf, xs_train, y_train): """Build stage0 models from the training set (xs_train, y_train). Parameters ---------- skf: StratifiedKFold-like iterator Use for cross validation blending. xs_train : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y_train : array-like, shape = [n_samples] The target values (class labels in classification). Returns ------- blend_train : array-like, shape = [n_samples] For stage1 model training. blend_test : array-like, shape = [n_samples] If you use TwoStageKFold, blended sample for test will be prepared. """ blend_train = None blend_test = None for j, clf in enumerate(self.clfs): self._out_to_console('Training classifier [{0}]'.format(j), 0) all_learner_key = str(type(clf)) + str(j) self.all_learner[all_learner_key] = [] blend_train_j = None for i, (train_index, cv_index) in enumerate(skf): now_learner = clone(clf) self.all_learner[all_learner_key].append(now_learner) xs_now_train = xs_train[train_index] y_now_train = y_train[train_index] xs_cv = xs_train[cv_index] #y_cv = y_train[cv_index] no use if not hasattr(now_learner, 'id'): now_learner.id = self.get_stage0_id(now_learner) dump_file = util.get_cache_file(now_learner.id, cv_index, suffix='pkl') if self.save_stage0 and self._is_saved(now_learner, cv_index): print('Prediction cache exists: skip fitting.') now_learner = joblib.load(dump_file) self.all_learner[all_learner_key][-1] = now_learner else: self._out_to_console('Fold [{0}]'.format(i), 0) now_learner.fit(xs_now_train, y_now_train) if self.save_stage0: joblib.dump(now_learner, dump_file, compress=True) if blend_train_j is None: blend_train_j = self._get_blend_init(y_train, now_learner) blend_train_j[cv_index] = self._get_child_predict( now_learner, xs_cv, cv_index) blend_train = numpy_c_concatenate(blend_train, blend_train_j) return blend_train, blend_test
def scrape_all(): with open(util.get_cache_file(CACHE, URL), "r") as file: data = yaml.safe_load(file) for mode in data["values"]: char = mode.get("char", "") name = mode.get("name", "") origin = mode.get("origin", "") comment = mode.get("comment", "") yield char, name, origin, comment
def scrape_os(os_name, github_path, cache_file, errors={}): for line in open(util.get_cache_file(cache_file, GITHUB + github_path)): define = DEFINE.match(line) if define: name, code, message = define.group(1), define.group( 2), define.group(3) errors[name] = errors.get(name, [""] * len(COLUMNS)) errors[name][COLUMNS.index("Name")] = name errors[name][COLUMNS.index("Message")] = message errors[name][COLUMNS.index(os_name)] = code return errors
def fit(self, xs_train, y_train, index=None): dump_file = "" if index is not None: dump_file = util.get_cache_file(self.estimator.id, index, cache_dir=self.cache_dir, suffix='pkl') if self.skip_refit and os.path.isfile(dump_file): if index is not None: self.estimator = joblib.load(dump_file) else: self.estimator.fit(xs_train, y_train) if index is not None: joblib.dump(self.estimator, dump_file, compress=True) return self
def scrape(): area = newcode = oldcodes = None for line in open(util.get_cache_file(CACHE1, URL1)): new = NEW.match(line) if new: area = new.group(2) newcode = new.group(1) old = OLD.match(line) if old: assert area and newcode and not oldcodes oldcodes = old.group(1).replace(",", "").replace(" ja ", " ") oldcodes = " ".join(oldcodes.split()) yield area, newcode, oldcodes area = newcode = oldcodes = None assert not (area or newcode or oldcodes)
def scrape_all(): with open(util.get_cache_file(CACHE_FILE, URL), newline="") as csvfile: reader = csv.reader(csvfile) for header_row in reader: break for row in reader: description = row[4].split("\n", 1)[0].strip() typeranges = row[1] typerange = HEXRANGE.match(typeranges) if typerange: mintype = int(typerange.group(1), 16) maxtype = int(typerange.group(2), 16) for typecode in range(mintype, maxtype + 1): yield "0x{:04X}".format(typecode), description else: typecode = int(typeranges, 16) yield "0x{:04X}".format(typecode), description
def scrape(): current = None for line in open(util.get_cache_file(CACHE, URL)).readlines(): rfc = RFC.match(line) desc = None if rfc: current = [rfc.group(1), ""] desc = rfc.group(2) elif current: desc = line if desc: pivot = desc.find("(Format:") was_last = pivot >= 0 desc = desc[:pivot] if was_last else desc current[1] += desc if was_last: yield current[0], fix_description(current[1]) current = None
def scrape_all(): ports = {} with open(util.get_cache_file(CACHE_FILE, URL), newline="") as csvfile: reader = csv.reader(csvfile) for header_row in reader: break for row in reader: service_name = row[0] description = row[3] port_spec = row[1] port_range = PORT_RANGE.match(port_spec) if port_range: minport = int(port_range.group(1)) maxport = int(port_range.group(2)) for port_number in range(minport, maxport + 1): scrape_port(port_number, service_name, description, ports) elif port_spec: port_number = int(port_spec) scrape_port(port_number, service_name, description, ports) return [row for _, row in sorted(ports.items())]
def slang_names(): slang = [""] * 128 field = None i = 0 for line in (tarfile.open(util.get_cache_file(TARFILE, URL)).extractfile( TARSTEM + "/nametable").read().decode("ascii").splitlines()): if line == "%%": i += 1 continue m = re.match(r"^([A-Za-z]+):\s*(.*?)\s*$", line) if m: field = m.group(1) value = m.group(2) m = re.match(r"^\s+(.*?)\s*$", line) if m: value = m.group(1) if field == "Synonyms": slang[i] += value for i in range(len(slang)): fields = re.sub(r'["]', " ", slang[i]).split(",") slang[i] = " / ".join(filter(None, map(str.strip, fields))) return slang
def scrape(): for line in open(util.get_cache_file(CACHE, URL)): m = re.match(r"^;\{\{.*?\}\}(\d{3}) (.*?)\s*$", line) if m: yield m.group(1), m.group(2).replace("[[", "").replace("]]", "")
def scrape(): for line in open(util.get_cache_file(CACHE, URL)): abbrev, state = ABBREV.search(line), STATE.search(line) if abbrev and state and abbrev != "US": abbrev, state = abbrev.group(1), state.group(1) yield abbrev, state
def scrape(): parser = Parser() parser.feed(open(util.get_cache_file(CACHE, URL)).read()) return [(a, b, c) for a, (b, c) in sorted(parser.colors.items())]
def _is_saved(self, model, index): model_id = self.get_stage0_id(model) return os.path.isfile(util.get_cache_file(model_id, index))
def scrape(): for line in open(util.get_cache_file(CACHE, URL)): if re.match(r"\| \[\[\.[a-z]+\]\] \|\|", line): columns = line[1:].split("||") yield list(map(cleanup_table_column, columns[:2]))
def scrape(): parser = Parser() parser.feed(open(util.get_cache_file(CACHE, URL)).read()) return parser.rows