예제 #1
0
    def _fit_child(self, skf, xs_train, y_train):
        """Build stage0 models from the training set (xs_train, y_train).

        Parameters
        ----------
        skf: StratifiedKFold-like iterator
            Use for cross validation blending.

        xs_train : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y_train : array-like, shape = [n_samples]
            The target values (class labels in classification).

        Returns
        -------
        blend_train : array-like, shape = [n_samples]
            For stage1 model training.
        blend_test : array-like, shape = [n_samples]
            If you use TwoStageKFold, blended sample for test will be prepared.
        """
        blend_train = None
        blend_test = None
        for j, clf in enumerate(self.clfs):
            self._out_to_console('Training classifier [{0}]'.format(j), 0)
            all_learner_key = str(type(clf)) + str(j)
            self.all_learner[all_learner_key] = []
            blend_train_j = None
            for i, (train_index, cv_index) in enumerate(skf):
                now_learner = clone(clf)
                self.all_learner[all_learner_key].append(now_learner)
                xs_now_train = xs_train[train_index]
                y_now_train = y_train[train_index]
                xs_cv = xs_train[cv_index]
                #y_cv = y_train[cv_index] no use

                if not hasattr(now_learner, 'id'):
                    now_learner.id = self.get_stage0_id(now_learner)

                dump_file = util.get_cache_file(now_learner.id,
                                                cv_index,
                                                suffix='pkl')
                if self.save_stage0 and self._is_saved(now_learner, cv_index):
                    print('Prediction cache exists: skip fitting.')
                    now_learner = joblib.load(dump_file)
                    self.all_learner[all_learner_key][-1] = now_learner
                else:
                    self._out_to_console('Fold [{0}]'.format(i), 0)
                    now_learner.fit(xs_now_train, y_now_train)
                    if self.save_stage0:
                        joblib.dump(now_learner, dump_file, compress=True)

                if blend_train_j is None:
                    blend_train_j = self._get_blend_init(y_train, now_learner)
                blend_train_j[cv_index] = self._get_child_predict(
                    now_learner, xs_cv, cv_index)
            blend_train = numpy_c_concatenate(blend_train, blend_train_j)
        return blend_train, blend_test
예제 #2
0
def scrape_all():
    with open(util.get_cache_file(CACHE, URL), "r") as file:
        data = yaml.safe_load(file)
        for mode in data["values"]:
            char = mode.get("char", "")
            name = mode.get("name", "")
            origin = mode.get("origin", "")
            comment = mode.get("comment", "")
            yield char, name, origin, comment
예제 #3
0
def scrape_os(os_name, github_path, cache_file, errors={}):
    for line in open(util.get_cache_file(cache_file, GITHUB + github_path)):
        define = DEFINE.match(line)
        if define:
            name, code, message = define.group(1), define.group(
                2), define.group(3)
            errors[name] = errors.get(name, [""] * len(COLUMNS))
            errors[name][COLUMNS.index("Name")] = name
            errors[name][COLUMNS.index("Message")] = message
            errors[name][COLUMNS.index(os_name)] = code
    return errors
예제 #4
0
 def fit(self, xs_train, y_train, index=None):
     dump_file = ""
     if index is not None:
         dump_file = util.get_cache_file(self.estimator.id,
                                         index,
                                         cache_dir=self.cache_dir,
                                         suffix='pkl')
     if self.skip_refit and os.path.isfile(dump_file):
         if index is not None:
             self.estimator = joblib.load(dump_file)
     else:
         self.estimator.fit(xs_train, y_train)
         if index is not None:
             joblib.dump(self.estimator, dump_file, compress=True)
     return self
예제 #5
0
def scrape():
    area = newcode = oldcodes = None
    for line in open(util.get_cache_file(CACHE1, URL1)):
        new = NEW.match(line)
        if new:
            area = new.group(2)
            newcode = new.group(1)
        old = OLD.match(line)
        if old:
            assert area and newcode and not oldcodes
            oldcodes = old.group(1).replace(",", "").replace(" ja ", " ")
            oldcodes = " ".join(oldcodes.split())
            yield area, newcode, oldcodes
            area = newcode = oldcodes = None
    assert not (area or newcode or oldcodes)
예제 #6
0
def scrape_all():
    with open(util.get_cache_file(CACHE_FILE, URL), newline="") as csvfile:
        reader = csv.reader(csvfile)
        for header_row in reader:
            break
        for row in reader:
            description = row[4].split("\n", 1)[0].strip()
            typeranges = row[1]
            typerange = HEXRANGE.match(typeranges)
            if typerange:
                mintype = int(typerange.group(1), 16)
                maxtype = int(typerange.group(2), 16)
                for typecode in range(mintype, maxtype + 1):
                    yield "0x{:04X}".format(typecode), description
            else:
                typecode = int(typeranges, 16)
                yield "0x{:04X}".format(typecode), description
예제 #7
0
def scrape():
    current = None
    for line in open(util.get_cache_file(CACHE, URL)).readlines():
        rfc = RFC.match(line)
        desc = None
        if rfc:
            current = [rfc.group(1), ""]
            desc = rfc.group(2)
        elif current:
            desc = line
        if desc:
            pivot = desc.find("(Format:")
            was_last = pivot >= 0
            desc = desc[:pivot] if was_last else desc
            current[1] += desc
            if was_last:
                yield current[0], fix_description(current[1])
                current = None
예제 #8
0
def scrape_all():
    ports = {}
    with open(util.get_cache_file(CACHE_FILE, URL), newline="") as csvfile:
        reader = csv.reader(csvfile)
        for header_row in reader:
            break
        for row in reader:
            service_name = row[0]
            description = row[3]
            port_spec = row[1]
            port_range = PORT_RANGE.match(port_spec)
            if port_range:
                minport = int(port_range.group(1))
                maxport = int(port_range.group(2))
                for port_number in range(minport, maxport + 1):
                    scrape_port(port_number, service_name, description, ports)
            elif port_spec:
                port_number = int(port_spec)
                scrape_port(port_number, service_name, description, ports)
    return [row for _, row in sorted(ports.items())]
예제 #9
0
def slang_names():
    slang = [""] * 128
    field = None
    i = 0
    for line in (tarfile.open(util.get_cache_file(TARFILE, URL)).extractfile(
            TARSTEM + "/nametable").read().decode("ascii").splitlines()):
        if line == "%%":
            i += 1
            continue
        m = re.match(r"^([A-Za-z]+):\s*(.*?)\s*$", line)
        if m:
            field = m.group(1)
            value = m.group(2)
        m = re.match(r"^\s+(.*?)\s*$", line)
        if m:
            value = m.group(1)
        if field == "Synonyms":
            slang[i] += value
    for i in range(len(slang)):
        fields = re.sub(r'["]', " ", slang[i]).split(",")
        slang[i] = " / ".join(filter(None, map(str.strip, fields)))
    return slang
예제 #10
0
def scrape():
    for line in open(util.get_cache_file(CACHE, URL)):
        m = re.match(r"^;\{\{.*?\}\}(\d{3}) (.*?)\s*$", line)
        if m:
            yield m.group(1), m.group(2).replace("[[", "").replace("]]", "")
예제 #11
0
def scrape():
    for line in open(util.get_cache_file(CACHE, URL)):
        abbrev, state = ABBREV.search(line), STATE.search(line)
        if abbrev and state and abbrev != "US":
            abbrev, state = abbrev.group(1), state.group(1)
            yield abbrev, state
예제 #12
0
def scrape():
    parser = Parser()
    parser.feed(open(util.get_cache_file(CACHE, URL)).read())
    return [(a, b, c) for a, (b, c) in sorted(parser.colors.items())]
예제 #13
0
 def _is_saved(self, model, index):
     model_id = self.get_stage0_id(model)
     return os.path.isfile(util.get_cache_file(model_id, index))
예제 #14
0
def scrape():
    for line in open(util.get_cache_file(CACHE, URL)):
        if re.match(r"\| \[\[\.[a-z]+\]\] \|\|", line):
            columns = line[1:].split("||")
            yield list(map(cleanup_table_column, columns[:2]))
예제 #15
0
def scrape():
    parser = Parser()
    parser.feed(open(util.get_cache_file(CACHE, URL)).read())
    return parser.rows