def findingshapelets(self, data, target): """ Searches for a shapelet classifier for each label. :param data: list of training examples :type data: np.array :param target: list of event labels for each training example :type target: np.array :return: with label as key and (classifier, target) as value :rtype: dict """ self.data = data self.target = target self.windows = self.calc_windows(self.sl_max, self.N_max) self.estimate_sigma_min() self.unique_labels = self.get_unique_targets(target) bsf_classifier = defaultdict(lambda: None) self.shapelets = dict() self.dimensions_subsets = list(powerset(range(data[0].shape[1])))[1:] self.precompute_z_norm(data) c = Counter(len(self.dimensions_subsets) * len(self.windows), prefix="generating shapelets") for i, dimension_subset in enumerate(self.dimensions_subsets): if dimension_subset == (): continue for j, window in enumerate(self.windows): shapelets = self.prune_shapelet_candidates( window, dimension_subset) for label in shapelets.keys(): self.shapelets[label, dimension_subset, window] = shapelets[label] c.printProgress(j + (i * len(self.windows)) + 1) self.precompute_bmd(data) for label in self.unique_labels: binary_target = np.array([int(label in x) for x in target]) c = Counter(len(self.dimensions_subsets) * len(self.windows), prefix=label) c.printProgress(0) for ds_i, dimension_subset in enumerate(self.dimensions_subsets): for w_i, window in enumerate(self.windows): key = (label, dimension_subset, window) classifier_candidates = self.build_classifier( self.shapelets[key], binary_target, label, dimension_subset) for c_i, classifier in enumerate(classifier_candidates): try: if self.cmp_classifier(bsf_classifier[label], classifier) > 0: bsf_classifier[label] = classifier except AttributeError: bsf_classifier[label] = classifier c.printProgress(ds_i * len(self.windows) + w_i + 1) bsf_classifier[label] = bsf_classifier[label], binary_target return bsf_classifier
def precompute_bmd(self, data): """ Calculates the BMD between all shapelet candidates and all training examples. :param data: list of training examples :type data: np.array """ self.dist_shapelet_ts = dict() c = Counter(data.shape[0], prefix="calculating min dist") c.printProgress(0) for ts_id in range(data.shape[0]): for axis in self.dimensions_subsets: for shapelet_length in self.windows: muh = np.concatenate([ self.shapelets[label, axis, shapelet_length] for label in self.unique_labels ]) ts = np.concatenate([ self.z_data[ts_id, shapelet_length][:, :, a][..., None] for a in axis ], axis=-1) d_m = distance_matrix3D(muh, ts).min(axis=1) i = 0 for label in self.unique_labels: key = (label, axis, shapelet_length) for shapelet_id, shapelet in enumerate( self.shapelets[key]): self.dist_shapelet_ts[ts_id, shapelet_id, label, shapelet_length, axis] = d_m[i] i += 1 c.printProgress(ts_id + 1)
def run_driver(v, D, U, H, verbose=False): # Generate small examples to walk through test_int, mu, x = alg.initialize(D, U, H) eintervals = [test_int] evals, evects = [], [] RQI_iters, bisec_iters, stable, inverse, solve_count = [], [], [], [], [] n = len(D) for it in xrange(n): #print it, sum([interval.num_evals() for interval in eintervals]) assert sanity_check(eintervals, n - it) #print("Finding the %dth eigen pair.\n" % (it+1)) counter = Counter() lamb, y = driver_back.find_pair(eintervals, D, U, H, mu, x, counter=counter, version=v) evals.append(lamb) evects.append(y) # D_hat = core.ldl_fast(D-lamb, U, H) # inertia = utils.inertia_ldl(D_hat) # assert inertia[1] == 1 #assert np.allclose(0, utils.comp_error(D, U, H, lamb, y) ) RQI_iters.append(counter.rqi_count) bisec_iters.append(counter.bisec_count) stable.append(counter.stable_count) inverse.append(counter.inverse_count) solve_count.append(counter.solve_count) if it != n - 1: mu = (eintervals[0].low + eintervals[0].high) / 2 # Can initialize smarter x = np.random.randn(n) x = x / np.linalg.norm(x) RQI_iters = np.array(RQI_iters) bisec_iters = np.array(bisec_iters) stable = np.array(stable) print("Max number of RQI iterations = %d, that of bisection = %d." % (max(RQI_iters), max(bisec_iters))) print("Tol number of RQI iterations = %d, that of bisection = %d." % (sum(RQI_iters), sum(bisec_iters))) print("Ave number of RQI iterations = %.2f, that of bisection = %.2f." % (np.mean(RQI_iters), np.mean(bisec_iters))) print("Tol number of inverse iterations = %d." % sum(inverse)) print("The total number of stable QR method is %d." % (sum(stable))) print("The total number of bisection that need solve is %d." % sum(solve_count)) A = form_A(D, U, H) for i in xrange(n): assert np.allclose(np.dot(A, evects[i]), evals[i] * evects[i])
def run(self): # create general data structures and queues input_queue = Queue() output_queue = Queue() processes = [] # create the should terminate event event = Event() finished_slaves_counter = Counter() # spawn the processes for i in range(self.context['number']): p = Process(target=process, args=[ input_queue, output_queue, event, finished_slaves_counter ]) p.start() processes.append(p) # get the queries and push them into the queue queue_thread = Process(target=fill_queue, args=[input_queue, event, self.context]) queue_thread.daemon = True queue_thread.start() # empty the output_queue result_count = 0 num_result_count_unchanged = 0 while True: try: result = output_queue.get_nowait() # do some dummy calculation hashlib.md5(result).hexdigest() result_count += 1 num_result_count_unchanged = 0 except Empty, e: num_result_count_unchanged += 1 if finished_slaves_counter.value( ) == self.context['number'] and ( result_count == self.context['query_number'] or num_result_count_unchanged >= 1000) and event.is_set(): break
def calculate_grid(self) -> None: """ Calculates the values of the grid based on current information """ self._verboseprint("Reading data...") self._initialize_data() self._verboseprint("Initializing map grid generation...") # initial grid grid_width = ceil((self._lon_max - self._lon_min) / self.scale) grid_height = ceil((self._lat_max - self._lat_min) / self.scale) self._verboseprint( ("Map Parameters\n" "--------------\n" "Lat Min: {}\n" "Lat Max: {}\n" "Lat Grid Height: {}\n" "Lon Min: {}\n" "Lon Max: {}\n" "Lon Grid Width: {}\n" "Grid Dimensions: ({}, {})\n").format(self._lat_min, self._lat_max, grid_height, self._lon_min, self._lon_max, grid_width, grid_width, grid_height)) grid = np.full((grid_height, grid_width), 0.0) self._verboseprint("Determining grid coordinates of points...") x_coords, y_coords, remove = [], [], [] item_count = len(self._names) for i in range(item_count): lat, lon = self._lats[i], self._lons[i] value, name = self._values[i], self._names[i] if (lon < self._lon_min - self.radius or lon > self._lon_max + self.radius or lat < self._lat_min - self.radius or lat > self._lat_max + self.radius): remove.append(i) continue grid_x = ceil((lon - self._lon_min) / self.scale) grid_y = ceil((lat - self._lat_min) / self.scale) x_coords.append(grid_x) y_coords.append(grid_y) # y comes first in the way the grid displays the map # which is why it is reversed in this fashion. # debugging is shown in x y to keep in line with # conventional thinking, since if you think of them # in the x y convention then it still makes sense # on the actual map. value_text = ("{} ({})".format(value, self._legend[value]) if self._mode == MODES[0] else value) self._verboseprint(("{} -> Map Coords: ({}, {}) || " "Grid Coords: ({}, {}) || " "Value: {}").format(name, lat, lon, grid_x, grid_y, value_text)) remove.sort(reverse=True) for i in remove: self._lats.pop(i) self._lons.pop(i) self._values.pop(i) self._names.pop(i) self._verboseprint("Filling in the grid...") try: import progressbar # displays progress nicely if installed prog_bar = progressbar.ProgressBar() except ImportError: prog_bar = lambda l: l for i in prog_bar(range(grid_height)): for j in range(grid_width): radius = self.radius / self.scale vicinity = [[ point_i, sqrt((x_coords[point_i] - j)**2 + (y_coords[point_i] - i)**2) ] for point_i in range(item_count)] if [item for item in vicinity if item[1] <= radius]: vicinity = [[point_i, 0.999 - point_dist / radius] for point_i, point_dist in vicinity if point_dist <= radius] # influence mode if self._mode == MODES[0]: weights = Counter() for point_i, weighted_dist in vicinity: weights[self._values[point_i]] += weighted_dist weights = list(weights.items()) weights.sort(key=lambda item: item[1], reverse=True) dominant = weights[0] d_value = dominant[0] d_weight = dominant[1] # sum of the other weights rest = sum([weight for value, weight in weights[1:]]) if not d_weight < rest: total_weight = d_weight - rest grid[i][j] = (self._legend[d_value] if total_weight >= 0.999 else self._legend[d_value] - (0.999 - total_weight)) # weighted mode elif self._mode == MODES[1]: total_count = 0 for point_i, weighted_dist in vicinity: total_count += weighted_dist * self._values[point_i] grid[i][j] = total_count self.grid = grid