def __init__(self, option: int, partition: int, combined_data: pd.DataFrame, effects: List[pd.DataFrame], view: View, look_similar=None): """ Create a new partitioned option :param option: the option index :param partition: the partition index :param combined_data: all the data (including probabilistic effects) concatenated :param effects: the individiaul effects (each effect is a stochastic transition) :param view: the view :param look_similar: a set of other partition indices that look identical to this one (but are in fact not) """ if look_similar is None: look_similar = set() self._option = option self._view = view self._partition = partition self._states = pd2np(combined_data['state']) self._agent_states = pd2np(combined_data['agent_state']) state_column = 'next_state' if view == View.PROBLEM else 'next_agent_state' mask_column = 'mask' if view == View.PROBLEM else 'agent_mask' total_samples = sum(len(effect[state_column]) for effect in effects) self._effects = [(len(effect[state_column]) / total_samples, effect[['state', 'agent_state', 'reward', 'next_state', 'next_agent_state', mask_column]]) for effect in effects] self._look_similar = look_similar # other partitions that look similar but are not self._combined_data = combined_data
def _is_overlap_init(A: pd.DataFrame, B: pd.DataFrame, **kwargs): epsilon = kwargs.get('init_epsilon', 0.05) min_samples = kwargs.get('init_min_samples', 5) X = pd2np(A['state']) Y = pd2np(B['state']) data = np.concatenate((X, Y)) return _num_clusters(data, epsilon, min_samples) <= max(_num_clusters(X, epsilon, min_samples), _num_clusters(Y, epsilon, min_samples))
def _merge(existing_cluster: pd.DataFrame, new_cluster: pd.DataFrame, verbose=False, **kwargs) -> Tuple[np.ndarray, np.ndarray]: """ Given an existing and new cluster, determine whether there is any overlap in their initation sets. Overlapping data should be extracted and put into its own cluster :param existing_cluster: the existing cluster :param new_cluster: the new cluster :param verbose: the verbosity level :return: two boolean arrays specifying, for the existing and new cluster, which data should be extracted out into its own cluster """ # TODO: this code could be improved/optimised, but will do that another time epsilon = kwargs.get('init_epsilon', 0.05) min_samples = kwargs.get('init_min_samples', 5) column = 'agent_state' if kwargs.get( 'view', View.PROBLEM) == View.AGENT else 'state' column = 'state' # we check teh problem space information regardless because because if we did not (and the # option was not in fact stochastic), we'd have to correct it later on. So just do it here X = pd2np(existing_cluster[column]) Y = pd2np(new_cluster[column]) data = np.concatenate((X, Y)) labels = DBSCAN(eps=epsilon, min_samples=min_samples).fit_predict(data) existing_labels = labels[0:len(X)] # labels of the existing partition data new_labels = labels[len(X):] # labels of the new partition data existing_labels_set = set(existing_labels) new_labels_set = set(new_labels) shared_labels = existing_labels_set.intersection(new_labels_set) shared_labels.discard(-1) # remove noise if present existing_shared = np.isin( existing_labels, list(shared_labels) ) # cast set to list because numpy is stupid https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html new_shared = np.isin( new_labels, list(shared_labels) ) # cast set to list because numpy is stupid https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html # Handle "noise" - count as intersected if the whole group has been subsumed. # TODO is this actually necessary? if -1 in existing_labels_set and existing_labels_set.issubset( new_labels_set): idx = np.where( existing_labels == -1) # find all points classifies as noise existing_shared[idx] = True if -1 in new_labels_set and new_labels_set.issubset(existing_labels_set): idx = np.where(new_labels == -1) # find all points classifies as noise new_shared[idx] = True show("Splitting data from old cluster", verbose and len(np.unique(existing_shared)) > 1) show("Splitting data from new cluster", verbose and len(np.unique(new_shared)) > 1) return existing_shared, new_shared
def effects(self, view=None): if view is None: view = self._view state_modifier = '' if view == View.PROBLEM else 'agent_' mask_modifier = '' if self._view == View.PROBLEM else 'agent_' for probability, frame in self._effects: yield probability, pd2np(frame['{}state'.format(state_modifier)]), pd2np(frame['reward']), pd2np( frame['next_{}state'.format(state_modifier)]), pd2np(frame['{}mask'.format(mask_modifier)]).astype(int)
def find_goal_symbols(factors: List[List[int]], vocabulary: Iterable[Proposition], transition_data: pd.DataFrame, verbose=False, **kwargs) -> Tuple[float, List[Proposition]]: """ Find the set of symbols that best described the goal condition. In teh data, the goal being achieved is specified by the done flag :param factors: the domain factorisation :param vocabulary: the list of symbols :param transition_data: the transition data :param verbose: the verbosity level :return the probability of the symbols modelling the goal, and the list of symbols themselves """ show("Searching for goal symbols", verbose) # the goal states column = get_column_by_view('next_state', kwargs) positive_samples = pd2np(transition_data.loc[transition_data['goal_achieved'] == True][column]) negative_samples = pd2np(transition_data.loc[transition_data['goal_achieved'] == False][column]) # fit a classifier to the data svm = _learn_precondition(positive_samples, negative_samples, verbose=verbose, **kwargs) # Find the existing symbols that best match the goal precondition show("Finding matching symbols", verbose) precondition_factors = _mask_to_factors(svm.mask, factors) candidates = list() for factor in precondition_factors: candidates.append([proposition for proposition in vocabulary if set(proposition.mask) == set(factor)]) combinations = list(itertools.product(*candidates)) show("Searching through {} candidates...".format(len(combinations)), verbose) best_score = 0 best_candidates = None for count, candidates in enumerate(combinations): show("Checking candidate {}".format(count), verbose) if _masks_overlap(candidates): # This should never happen, but putting a check to make sure warn("Overlapping candidates in PDDL building!") continue # probability of propositions matching classifier precondition_prob = _probability_in_precondition(candidates, svm) if precondition_prob > best_score: best_score = precondition_prob best_candidates = candidates show("Best candidates with probability {}: {}".format(best_score, ' '.join([str(x) for x in best_candidates])), verbose) return best_score, list(best_candidates)
def _cluster_effects(samples: pd.DataFrame, mask: List[int], verbose=False, **kwargs) -> List[pd.DataFrame]: """ Cluster samples based on their effects :param samples: the samples :param mask: the state variables modified by the option :param verbose: the verbosity level :return: a list of data frames, which each element in the list representing a single cluster """ epsilon = kwargs.get('effect_epsilon', 0.05) min_samples = kwargs.get('effect_min_samples', 5) data = pd2np(samples['next_state']) # convert to numpy masked_data = data[:, mask] # cluster only on state variables that changed db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(masked_data) labels = db.labels_ show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose) clusters = list() for label in set(labels): if label == -1: # noise continue clusters.append(samples.loc[np.where(labels == label)]) # reset the index back to zero based clusters = [cluster.reset_index(drop=True) for cluster in clusters] # not in place return clusters
def _generate_start_symbols(transition_data: pd.DataFrame, factors: List[List[int]], verbose=False, **kwargs) -> List[StateDensityEstimator]: show("Generating start state symbols...", verbose) # group by episode and get the first state from each initial_states = pd2np(transition_data.groupby('episode').nth(0)['state']) return _generate_symbols(initial_states, factors, verbose=verbose, **kwargs)
def __init__(self, option: int, partition: int, init_cluster: pd.DataFrame, effects: List[pd.DataFrame]): self._option = option self._partition = partition self._states = pd2np(init_cluster['state']) total_samples = sum(len(effect['next_state']) for effect in effects) self._effects = [(len(effect['next_state']) / total_samples, effect[['state', 'reward', 'next_state', 'mask']]) for effect in effects]
def _cluster_data(samples: pd.DataFrame, column_name: str, epsilon: float, min_samples: int, verbose=False) -> List[pd.DataFrame]: data = samples[column_name] # TODO how to get a non object dtype out of pandas??? db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(pd2np(data)) labels = db.labels_ show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose) clusters = list() for label in set(labels): if label == -1: # noise continue clusters.append(samples.loc[np.where(labels == label)]) # reset the index back to zero based clusters = [cluster.reset_index(drop=True) for cluster in clusters] # not in place return clusters
def _learn_preconditions( init_data: pd.DataFrame, partitioned_options: List[PartitionedOption], all_partitions: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]: state_column = 'state' if kwargs.get( 'view', View.PROBLEM) == View.PROBLEM else 'agent_state' preconditions = dict() prev_option = None negative_data = None for partition in partitioned_options: option = partition.option if option != prev_option: # no need to reload if no change negative_data = pd2np(init_data.loc[ (init_data['option'] == option) & (init_data['can_execute'] == False)][state_column]) # must do equals False because Pandas! show( 'Learning precondition for option {}, partition {}'.format( option, partition.partition), verbose) if kwargs.get('augment_negative', True): # augment negative samples from the initiation sets of the other partitions negative_samples = _augment_negative(negative_data, partition.partition, all_partitions[option]) else: negative_samples = negative_data # this property gets either agent or problem-space states, whichever was used to partition in the first place positive_samples = partition.states show( "Calculating mask for option {}, partition {} ...".format( partition.option, partition.partition), verbose) precondition = _learn_precondition(positive_samples, negative_samples, verbose=verbose, **kwargs) preconditions[(option, partition.partition)] = precondition prev_option = option return preconditions
def _cluster_effects(samples: pd.DataFrame, mask: List[int], verbose=False, **kwargs) -> List[pd.DataFrame]: """ Cluster samples based on their effects :param samples: the samples :param mask: the state variables modified by the option :param verbose: the verbosity level :return: a list of data frames, which each element in the list representing a single cluster """ epsilon = kwargs.get('effect_epsilon', 0.05) min_samples = kwargs.get('effect_min_samples', 5) column = 'next_agent_state' if kwargs.get( 'view', View.PROBLEM) == View.AGENT else 'next_state' data = pd2np(samples[column]) # convert to numpy masked_data = data[:, mask] # cluster only on state variables that changed if len(mask) == 0: # we're just going to assume that everything is one class! labels = np.zeros(shape=(len(masked_data), )) if len(masked_data) < min_samples: labels += -1 else: db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(masked_data) labels = db.labels_ show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose) clusters = list() for label in set(labels): if label == -1: # noise continue clusters.append(samples.loc[np.where(labels == label)]) # reset the index back to zero based clusters = [cluster.reset_index(drop=True) for cluster in clusters] # not in place return clusters
def _generate_goal_symbols(transition_data: pd.DataFrame, factors: List[List[int]], verbose=False, **kwargs) -> List[KernelDensityEstimator]: show("Generating goal symbols...", verbose) goal_states = pd2np(transition_data.loc[transition_data['done'] == True]['next_state']) return _generate_symbols(goal_states, factors, verbose=verbose, **kwargs)
def effects(self): for probability, frame in self._effects: yield probability, pd2np(frame['state']), pd2np( frame['reward']), pd2np(frame['next_state']), pd2np( frame['mask']).astype(int)
def find_closest_start_partition(problem_symbols: QuickCluster, transition_data: pd.DataFrame): initial_states = pd2np(transition_data.groupby('episode').nth(0)['state']) target = np.mean(initial_states, 0) return problem_symbols.get(target)
def _generate_goal_symbols(transition_data: pd.DataFrame, factors: List[List[int]], verbose=False, **kwargs) -> List[StateDensityEstimator]: show("Generating goal symbols...", verbose) column = get_column_by_view('next_state', kwargs) goal_states = pd2np(transition_data.loc[transition_data['done'] == True][column]) return _generate_symbols(goal_states, factors, verbose=verbose, **kwargs)