def c_int_ext(k: int, aff: np.ndarray, adj_mat: np.ndarray, threads_nb=1) -> float: """ This function calculates the inter/intra-cluster density as defined in Santo Fortunato, Community Detection in Graphs, Physics Reports, 486, 75-174(2010) Parameters ---------- k : int The number of clusters aff : np.ndarray A 1-D array contains the affectation of nodes to their clusters adj_mat : np.ndarray Adjacency matrix Returns ------- float, float The value of sum(sigma_int), sum(sigma_ext) which is the quality of the clustering. """ global int_sigmas global ext_sigmas # initiate to zeros int_sigmas = np.zeros(k) ext_sigmas = np.zeros(k) # Get the number of nodes n = len(aff) # Calculates the internal and external edges # for each cluster threads = [] # type: list[Thread] # if threads number is too large then update it if n / 10 < threads_nb: threads_nb = int(n / 10) # create threads instances for i in range(threads_nb): from_i = int(i * (n / threads_nb)) to_i = int((i + 1) * (n / threads_nb)) t = Thread(target=calculate_int_ext_edges, args=(adj_mat, aff, from_i, to_i)) threads.append(t) threads[i].start() # Wait fo threads to finish for t in threads: t.join() # Transform aff from np.ndarray to list # to be able to use the count function aff = aff.tolist() # type: list # Calculates the density for each cluster for i in range(k): nb_c_i = aff.count(i) if nb_c_i <= 1: int_sigmas[i] = 0 ext_sigmas[i] = 0 else: int_sigmas[i] /= (nb_c_i * (nb_c_i - 1) / 2) ext_sigmas[i] /= (nb_c_i * (n - nb_c_i)) # Return the density for all the clusters return sum(int_sigmas) / k, sum(ext_sigmas) / k
def pet(temperature_celsius: np.ndarray, latitude_degrees: float, data_start_year: int) -> np.ndarray: """ This function computes potential evapotranspiration (PET) using Thornthwaite's equation. :param temperature_celsius: an array of average temperature values, in degrees Celsius :param latitude_degrees: the latitude of the location, in degrees north, must be within range [-90.0 ... 90.0] (inclusive), otherwise a ValueError is raised :param data_start_year: the initial year of the input dataset :return: an array of PET values, of the same size and shape as the input temperature values array, in millimeters/time step :rtype: 1-D numpy.ndarray of floats """ # make sure we're not dealing with all NaN values if np.ma.isMaskedArray(temperature_celsius) and ( temperature_celsius.count() == 0): # we started with all NaNs for the temperature, so just return the same as PET return temperature_celsius else: # we were passed a vanilla Numpy array, look for indices where the value == NaN if np.all(np.isnan(temperature_celsius)): # we started with all NaNs for the temperature, so just return the same return temperature_celsius # If we've been passed an array of latitude values then just use # the first one -- useful when applying this function with xarray.GroupBy # or numpy.apply_along_axis() where we've had to duplicate values in a 3-D # array of latitudes in order to correspond with a 3-D array of temperatures. if isinstance(latitude_degrees, np.ndarray) and (latitude_degrees.size > 1): latitude_degrees = latitude_degrees.flat[0] # make sure we're not dealing with a NaN or out-of-range latitude value if ((latitude_degrees is not None) and not np.isnan(latitude_degrees) and (latitude_degrees < 90.0) and (latitude_degrees > -90.0)): # compute and return the PET values using Thornthwaite's equation return eto.eto_thornthwaite(temperature_celsius, latitude_degrees, data_start_year) else: message = ("Invalid latitude value: " + str(latitude_degrees) + " (must be in degrees north, between -90.0 and " + "90.0 inclusive)") _logger.error(message) raise ValueError(message)
def calculate_conditional_probabilities(attributes: np.ndarray, data: np.ndarray, output: np.ndarray) -> dict: """ for each unique output value, for each attribute, we check the apparition of each unique attribute value and compute it's probability :param attributes: :param data: :param output: :return: dictionary with each attribute values conditional probability """ conditional_probabilities = dict() for i in attributes: conditional_probabilities[i] = dict() output_set = set(output) for index, atr in enumerate(data): for output_value in output_set: conditional_probabilities[attributes[index]][output_value] = 0.0 filter_array = [] for i in output: if i == output_value: filter_array.append(True) else: filter_array.append(False) attribute_values_for_output = list(atr[filter_array]) for i in set(attribute_values_for_output): conditional_probabilities[ attributes[index]][output_value] = float( attribute_values_for_output.count(i) / len(attribute_values_for_output)) conditional_probabilities[attributes[-1]] = dict() output = list(output) for i in output_set: conditional_probabilities[attributes[-1]][i] = float( output.count(i) / len(output)) return conditional_probabilities
def is_data_valid(data: np.ndarray, ) -> bool: """ Returns whether or not an array is valid, i.e. a supported array type (ndarray or MaskArray) which is not all-NaN. :param data: data object, expected as either numpy.ndarry or numpy.ma.MaskArray :return True if array is non-NaN for at least one element and is an array type valid for processing by other modules :rtype: boolean """ # make sure we're not dealing with all NaN values if np.ma.isMaskedArray(data): valid_flag = bool(data.count()) elif isinstance(data, np.ndarray): valid_flag = not np.all(np.isnan(data)) else: _logger.warning("Invalid data type") valid_flag = False return valid_flag