Пример #1
0
    def print_model_scores(kfold_results):
        kfold_scores = [res[-1] for res in kfold_results]
        all_models = set(
            flatten([list(kfold.keys()) for kfold in kfold_scores]))

        for model in all_models:
            model_mean = np.mean(
                flatten([kfold[model] for kfold in kfold_scores]))
            print(f"{model} AUC: \t {model_mean}")
Пример #2
0
    def __init__(self,
                 features: List[Union[Feature, List[Feature]]],
                 cache_table: str = None):
        """
        features to be used in this builder
        Args:
            features: a list of features
            cache_table: Optional, if specified will look/save calculated features in the cache
        """
        self.features = flatten(features)

        self.cache_table = cache_table
Пример #3
0
def main():
    random.seed(42)
    numpy.random.seed(42)
    multiproc_util.force_serial = True
    task = None


    # get the feature names
    feature_factory = PostgresFeatureFactory(task.embedder.features, input_gs=None)
    all_feat_names = flatten([feat.feature_names for feat in feature_factory.features])

    # create bokeh tabs
    tabs = []
    # tabs += [Panel(child=FeatureDashboard(all_feat_names).main_panel, title="Feature Exploration")]
    tabs += [Panel(child=BuildingTaskDashboard(task).main_panel, title="Task")]  # TODO: doesn't work for some reason

    tabs = Tabs(tabs=tabs)
    curdoc().add_root(tabs)
Пример #4
0
 def all_feat_names(self) -> List[str]:
     return flatten([feat.feature_names for feat in self.features])
Пример #5
0
def parmap(f: Callable,
           X: List[object],
           nprocs=multiprocessing.cpu_count(),
           force_parallel=False,
           chunk_size=1,
           use_tqdm=False,
           keep_child_tqdm=True,
           **tqdm_kwargs) -> list:
    """
    Utility function for doing parallel calculations with multiprocessing.
    Splits the parameters into chunks (if wanted) and calls.
    Equivalent to list(map(func, params_iter))
    Args:
        f: The function we want to calculate for each element
        X: The parameters for the function (each element ins a list)
        chunk_size: Optional, the chunk size for the workers to work on
        nprocs: The number of procs to use (defaults for all cores)
        use_tqdm: Whether to use tqdm (default to False)
        tqdm_kwargs: kwargs passed to tqdm

    Returns:
        The list of results after applying func to each element

    Has problems with using self.___ as variables in f (causes self to be pickled)
    """
    if len(X) == 0:
        return []  # like map
    if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size:
        chunk_size = 1  # use chunk_size = 1 if there is enough procs for a batch size of 1

    nprocs = int(max(1, min(nprocs, len(X) / chunk_size)))  # at least 1
    if len(X) < nprocs:
        if nprocs != multiprocessing.cpu_count():
            print("parmap too much procs")
        nprocs = len(X)  # too much procs

    args = zip(X, [f] * len(X), range(len(X)), [keep_child_tqdm] * len(X))
    if chunk_size > 1:
        args = list(chunk_iterator(args, chunk_size))
        s_fun = _chunk_spawn_fun  # spawn fun
    else:
        s_fun = _spawn_fun  # spawn fun

    if (nprocs == 1 and not force_parallel
        ) or force_serial:  # we want it serial (maybe for profiling)
        return list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs)))

    try:  # try-catch hides bugs
        global proc_count
        old_proc_count = proc_count
        proc_count = nprocs
        p = Pool(nprocs)
        p.restart(force=True)
        # can throw if current proc is daemon
        if use_tqdm:
            retval_par = tqdm(p.imap(lambda arg: s_fun(arg), args),
                              total=int(len(X) / chunk_size),
                              **tqdm_kwargs)
        else:
            # import  pdb
            # pdb.set_trace()
            retval_par = p.map(lambda arg: s_fun(arg), args)

        retval = list(retval_par)  # make it like the original map
        if chunk_size > 1:
            retval = flatten(retval)

        p.terminate()
        proc_count = old_proc_count
        global i
        i += 1
    except AssertionError as e:
        # if e == "daemonic processes are not allowed to have children":
        retval = list(map(f,
                          tqdm(X, disable=not use_tqdm,
                               **tqdm_kwargs)))  # can't have pool inside pool
    return retval
Пример #6
0
    # ROADS
    (MAJOR_ROAD, OSM_LINE_TABLE, "major_road", all_radii_up_to(100, 1000),
     relevant_feat_types("line_length"), 1),
    (MINOR_ROAD, OSM_LINE_TABLE, "minor_road", all_radii_up_to(50, 250),
     relevant_feat_types("line_length"), 1),

    # BUILDING
    (BUILDING, OSM_POLYGON_TABLE, "building", all_radii_up_to(50, 250),
     relevant_feat_types("polygon_area"), 1),
    # (BUILDING, OSM_POLYGON_TABLE, "building", all_radii_up_to(0, 250), [Heights], 2),  # doesn't work for normal osm
    (BUILDING, OSM_POLYGON_TABLE, "building", all_radii_up_to(max_radius=0),
     [AreaOf], 3),
    # (JUNCTIONS, JUNCTIONS_TABLE, "junction", all_radii_up_to(max_radius=250), [OSMRoute]),
]

all_bundle_features = flatten([karka_bundle_features])


# TODO: bad name
def create_building_features(elements=None, level_importance=10):
    if elements is None:
        elements = all_bundle_features

    all_features = []
    for filt, table_name, obj_name, radii, features_types, importance in elements:
        if importance <= level_importance:
            for radius, feat_type in product(radii, features_types):
                all_features.append(
                    feat_type(filt, table_name, obj_name, radius=radius))
    return all_features