示例#1
0
def HARMONIC_MEAN(df, n, price='Close'):
    """
    Harmonic mean of data
    """
    harmonic_mean_list = []
    i = 0
    if n == len(df[price]):
        start = None
        while i < len(df[price]):
            if df[price][i] != df[price][i]:
                harmonic_mean = float('NaN')
            else:
                if start is None:
                    start = i
                end = i + 1
                harmonic_mean = statistics.harmonic_mean(df[price][start:end])
            harmonic_mean_list.append(harmonic_mean)
            i += 1
    else:
        while i < len(df[price]):
            if i + 1 < n:
                harmonic_mean = float('NaN')
            else:
                start = i + 1 - n
                end = i + 1
                harmonic_mean = statistics.harmonic_mean(df[price][start:end])
            harmonic_mean_list.append(harmonic_mean)
            i += 1
    return harmonic_mean_list
示例#2
0
def processTestGrouping(KB, test, preprocessing, similarity):
    """
    Receive a KB and test set, and using the preprocessing and 
    similarity functions given, process the test set using a retrieval 
    based method with KB as the knowledge base.
    Results are returned in a list and written to a file resultados.txt.
    """

    results = open('resultados.txt', "w+")
    resultsList = []
    for test_question in test:
        # preprocess test question
        preproc_question = preprocessing(test_question)
        # set values for finding max
        best_question_id = -1
        best_similarity_value = math.inf
        # loop retrieval and similarity assessment
        a_id = KB[0, 1]
        similarity_faq = []
        for kb_question in KB:
            if kb_question[1] != a_id:
                stat = statistics.harmonic_mean(similarity_faq)
                if stat < best_similarity_value:
                    best_question_id = a_id
                    best_similarity_value = stat
                a_id = kb_question[1]
                similarity_faq = []

            value = similarity(kb_question[0], preproc_question)
            similarity_faq.append(value)

        # last one
        stat = statistics.harmonic_mean(similarity_faq)
        if stat < best_similarity_value:
            best_question_id = a_id
            best_similarity_value = stat
        # threshold
        if similarity.__name__ != 'edit' and best_similarity_value > THRESHOLD:
            best_question_id = '0'
        # write to results file
        results.write(str(best_question_id) + '\n')
        # append to results list
        resultsList.append(best_question_id)
    results.close()
    return resultsList


# GROUP
# mean 0.919
# median 0.878 1by1 91.1
# harmonic_mean 93.5 91.1
# median_low 0.86 91.1
# median_high 86.2 91.1
# median grouped-1 .44
示例#3
0
    def calculate_averages(self, coeffs):
        micro_avg_prec = sum(list(zip(*coeffs))[0]) / (
            sum(list(zip(*coeffs))[0]) + sum(list(zip(*coeffs))[1]))
        micro_avg_rcll = sum(list(zip(*coeffs))[0]) / (
            sum(list(zip(*coeffs))[0]) + sum(list(zip(*coeffs))[2]))
        micro_f1 = stats.harmonic_mean([micro_avg_prec, micro_avg_rcll])
        macro_avg_prec = stats.mean([cf[0] / (cf[0] + cf[1]) for cf in coeffs])
        macro_avg_rcll = stats.mean([cf[0] / (cf[0] + cf[2]) for cf in coeffs])
        macro_f1 = stats.harmonic_mean([macro_avg_prec, macro_avg_rcll])

        return micro_avg_prec, micro_avg_rcll, micro_f1, macro_avg_prec, macro_avg_rcll, macro_f1
示例#4
0
def normal_ratio(df, marker, num_std, reverse=False):
    if reverse == False:
        test = np.array(np.sum(marker, axis=0) / np.sum(df, axis=0),
                        dtype=float).round(decimals=4)
        std_bn_samples = (statistics.harmonic_mean(test) +
                          np.std(test) * num_std).round(decimals=4)
        return test, std_bn_samples
    else:
        test = 1 / np.array(np.sum(marker, axis=0) / np.sum(df, axis=0),
                            dtype=float).round(decimals=4)
        std_bn_samples = (statistics.harmonic_mean(test) +
                          np.std(test) * num_std).round(decimals=4)
        return test, std_bn_samples
 def fight(self, other):
     # fight1 = self.power * self.wisdom/self.power + self.wisdom
     # fight2 = other.power * other.wisdom/ other.power + other.wisdom
     fight1 = statistics.harmonic_mean([self.power, self.wisdom])
     fight2 = statistics.harmonic_mean([other.power, other.wisdom])
     if fight1 > fight2:
         print(f"{self.name}, you are the winner!")
         return True
     else:
         print(f"{other.name}, you are the winner!")
         return False
     print(fight1)
     print(fight2)
示例#6
0
    def record_statistics(self) -> None:
        """
        Record statistics.
        """
        self.stats[Stats.REQUESTS_DURATION_TOTAL] = int(
            sum(self._stats_request_durations))
        self.stats[Stats.REQUESTS_DURATION_AVG] = int(
            harmonic_mean(self._stats_request_durations))
        self.stats[Stats.REQUESTS_DURATION_MAX] = int(
            max(self._stats_request_durations))
        self.stats[Stats.REQUESTS_DURATION_MIN] = int(
            min(self._stats_request_durations))
        self.stats[Stats.REQUESTS_DURATION_MEDIAN] = int(
            median(self._stats_request_durations))

        self.stats[Stats.CONTENT_LENGTH_TOTAL] = int(
            sum(self._stats_response_content_lengths))
        self.stats[Stats.CONTENT_LENGTH_AVG] = int(
            harmonic_mean(self._stats_response_content_lengths))
        self.stats[Stats.CONTENT_LENGTH_MAX] = int(
            max(self._stats_response_content_lengths))
        self.stats[Stats.CONTENT_LENGTH_MIN] = int(
            min(self._stats_response_content_lengths))
        self.stats[Stats.CONTENT_LENGTH_MEDIAN] = int(
            median(self._stats_response_content_lengths))

        self.stats[Stats.URLS_SEEN] = len(self._duplicate_filter.fingerprints)

        self.stats[Stats.QUEUE_WAIT_AVG] = harmonic_mean(
            self._stats_queue_wait_times)
        self.stats[Stats.QUEUE_WAIT_MIN] = min(self._stats_queue_wait_times)
        self.stats[Stats.QUEUE_WAIT_MAX] = max(self._stats_queue_wait_times)
        self.stats[Stats.QUEUE_WAIT_MEDIAN] = median(
            self._stats_queue_wait_times)

        self.stats[Stats.QUEUE_SIZE_MAX] = max(self._stats_queue_sizes)
        self.stats[Stats.QUEUE_SIZE_AVG] = int(
            harmonic_mean(self._stats_queue_sizes))
        self.stats[Stats.QUEUE_SIZE_MEDIAN] = int(
            median(self._stats_queue_sizes))

        self.stats[Stats.REQUESTS_LATENCY_AVG] = harmonic_mean(
            self._stats_request_latencies)
        self.stats[Stats.REQUESTS_LATENCY_MAX] = int(
            max(self._stats_request_latencies))
        self.stats[Stats.REQUESTS_LATENCY_MIN] = int(
            min(self._stats_request_latencies))
        self.stats[Stats.REQUESTS_LATENCY_MEDIAN] = int(
            median(self._stats_request_latencies))
        self.stats[Stats.REQUESTS_LATENCY_TOTAL] = int(
            sum(self._stats_request_latencies))
示例#7
0
def get_average_nlg(nlg_global_scores):
    """
    Combines the different NLG metrics into two scores.
    """
    chosen_metrics = ["METEOR", "ROUGE_L", "CIDEr", "SPICE"]
    chosen_overlap_scores = dict((k, nlg_global_scores[k]) for k in chosen_metrics)
    nlg_global_scores["avg_overlap"] = statistics.harmonic_mean(
        list(chosen_overlap_scores.values())
    )
    nlg_global_scores["avg_all"] = statistics.harmonic_mean(
        [nlg_global_scores["avg_overlap"], nlg_global_scores["BERTScore"]]
    )

    return nlg_global_scores
示例#8
0
def learn(data):
	# if a == None and m == None:
	#
	# elif not (a == None or m == None):
	# 	pass
	# else:
	# 	raise ValueError("I need both Addition and Multiply lists.")
	a = []
	m = []
	for example, target in data:
		m.append(example // target)
		a.append(example - (example // target) * target)
	# print(m)
	# print(a)
	# return [(m[i], a[i])for i in range(0, len(a))]
	return statistics.harmonic_mean(m), statistics.harmonic_mean(a)
示例#9
0
 def step(self, actions_selected):
     done = False
     state = np.array([])
     reward_inputs = []
     feasible_transfers = []
     self.get_inventory_transfers(actions_selected)
     self.get_vehicles_transfers(actions_selected)
     vehicles_transfers_matrix = self.vehicles_transfers_tensor[self.day]
     inventory_transfers_matrix = self.inventory_transfers_tensor[self.day]
     for i in range(len(self.demand_points)):
         a, b, c, done = self.demand_points[i].step(
             vehicles_transfers_matrix, inventory_transfers_matrix)
         state = np.concatenate((state, c))
         reward_inputs = reward_inputs + [b]
         feasible_transfers = feasible_transfers + [a]
     self.day = self.day + 1
     reward = 0
     for x in reward_inputs:
         if x < 0:
             reward_inputs.remove(x)
             reward = reward + x
     if reward == 0:
         reward = statistics.harmonic_mean(reward_inputs)
     if sum(feasible_transfers) < len(feasible_transfers):
         reward = -100000
     if reward < 0 or self.day == self.inventory_transfers_tensor.shape[
             0] - 1:
         done = True
     return state, reward, done
示例#10
0
 def calculate_score_for_label(label_words, query_synsets):
     scores = [SemScore.calculate_score_for_label_word(w, query_synsets)
               for w in label_words]
     try:
         return statistics.harmonic_mean(filter(None, scores))
     except statistics.StatisticsError:
         return 0.0
def stats():
    listNum = int(input('How many numbers in your list?:'))
    nums = []
    for i in range(listNum):
        nums.append(int(input("")))

    op = input('What kind of operation would you like to do?\
        \nChoose between "mode, mean, gmean, hmean, median, range, stdev" : ')

    if op == 'mode':
        return statistics.multimode(nums)

    elif op == 'mean':
        return statistics.mean(nums)

    elif op == 'gmean':
        return statistics.geometric_mean(nums)

    elif op == 'hmean':
        return statistics.harmonic_mean(nums)

    elif op == 'median':
        return statistics.median(nums)

    elif op == 'stdev':
        return statistics.stdev(nums)

    elif op == 'range':
        return max(nums) - min(nums)
示例#12
0
    def __str__(self):
        out_list = []

        def format_title(s):
            out_list.append(f"{s:-^42}")

        def format_line(s, n):
            out_list.append(f"--- {s:>20}: {n:>12.2f} ---")

        format_title("Summary")
        format_line("mean", mean(self.tx_rates_data))
        format_line("harmonic mean", harmonic_mean(self.tx_rates_data))
        format_line("standard deviation", pstdev(self.tx_rates_data))
        format_line("median", median(self.tx_rates_data))
        format_line("max", max(self.tx_rates_data))
        format_line("min", min(self.tx_rates_data))

        format_title("Histogram")
        buckets_list = self.histogram_data["buckets"]
        buckets = {tuple(e[0]): e[1] for e in buckets_list}
        out_list.append(
            f"({sum(buckets.values())} samples in {len(buckets)} buckets)")
        max_count = max(buckets.values())
        for k, count in sorted(buckets.items()):
            out_list.append("{:>12}: {}".format(
                f"{k[0]}-{k[1]}", "*" * min(count, (60 * count // max_count))))

        return "\n".join(out_list)
 def _statistic_features(values):
     max_value = max(values)
     mean = statistics.mean(values)
     variance = statistics.variance(values) if len(values) > 1 else 0
     harmonic = statistics.harmonic_mean(values)
     median_high = statistics.median_high(values)
     return [max_value, mean, variance, harmonic, median_high]
示例#14
0
def b_cubed(
    key: ty.Sequence[ty.Set], response: ty.Sequence[ty.Set]
) -> ty.Tuple[float, float, float]:
    r"""
    Compute the B³ `$(R, P, F₁)$` scores for a `#response` clustering given a `#key` clustering,
    that is
    ```math
    R &= \frac{∑_{k∈K}∑_{r∈R}\frac{(\#k∩r)²}{\#k}}{∑_{k∈K}\#k}\\
    P &= \frac{∑_{r∈R}∑_{k∈K}\frac{(\#r∩k)²}{\#r}}{∑_{r∈R}\#r}\\
    F &= 2*\frac{PR}{P+R}
    ```
    """
    if sum(len(k) for k in key) == 0:
        R = 0.0
    else:
        R = math.fsum(
            len(k.intersection(r)) ** 2 / len(k) for k in key for r in response
        ) / sum(len(k) for k in key)
    if sum(len(r) for r in response) == 0:
        P = 0.0
    else:
        P = math.fsum(
            len(r.intersection(k)) ** 2 / len(r) for r in response for k in key
        ) / sum(len(r) for r in response)
    F = harmonic_mean((R, P))
    return R, P, F
示例#15
0
 def _query(self, ages, save: bool = True, log: bool = True) -> float:
     """Query to catboost model
     :param ages: list with ages
     :param save: Need to save model or not
     :param log: Need to log or not
     :return: estimated age by list with ages
     """
     if not ages:
         return -1
     mean = round(st.mean(ages), 2)
     median = round(st.median(ages), 2)
     hmean = round(st.harmonic_mean(ages), 2)
     mode = round(find_average_mode(ages), 2)
     std = round(np.array(ages).std(), 2)
     friends_cnt = len(ages)
     predicted = self.reg.predict(
         [mean, hmean, mode, median, std, friends_cnt])
     predicted = round(predicted, 2)
     if log and settings.log_needed:
         logging.info(
             f"query^Predicted successfully. Mean: {mean}. HMean: {hmean}. Mode: {mode}. Median: {median}. Std: {std}. Result: {predicted}."
         )
     if save:
         self.save_model(filename=settings.neural_network_file)
     return predicted
def gather_dataset_stats(dataset, dataset_name):
    """"
    dataset : dict - singular dataset in format {"input":x,"label":x}
    --------------------------------------------------------------------
    output : dict - statistics of dataset
    --------------------------------------------------------------------
    calculates statistics for a given dataset
    """
    lens = sorted([len(s["input"]) for s in dataset])
    labels = [s["label"] for s in dataset]

    return {
        "dataset": dataset_name,
        "negative": len([None for i in labels if i == -1]),
        "neutral": len([None for i in labels if i == 0]),
        "positive": len([None for i in labels if i == 1]),
        "total": len(dataset),
        "min_length": lens[0],
        "max_length": lens[-1],
        "arithmetic_mean": statistics.mean(lens),
        "harmonic_mean": statistics.harmonic_mean(lens),
        "median": statistics.median(lens),
        "mode": statistics.mode(lens),
        "stdev": statistics.stdev(lens),
        "variance": statistics.variance(lens)
    }
示例#17
0
def main():
    statistics_data = {}
    for file_name in file_names:
        statistics_data[file_name] = {}
        file_data = pd.read_csv(file_name)
        file_data.dropna(subset=['CO', 'SO2', 'NO2'], inplace=True)

        for col_name in ['CO', 'SO2', 'NO2']:
            statistics_data[file_name][col_name] = {}
            statistics_data[file_name][col_name]['mean'] = statistics.mean(
                file_data[col_name])
            statistics_data[file_name][col_name][
                'harmonic_mean'] = statistics.harmonic_mean(
                    file_data[col_name])
            statistics_data[file_name][col_name]['media'] = statistics.median(
                file_data[col_name])
            statistics_data[file_name][col_name][
                'low_median'] = statistics.median_low(file_data[col_name])
            statistics_data[file_name][col_name][
                'high_median'] = statistics.median_high(file_data[col_name])
            statistics_data[file_name][col_name][
                'population_std_dev'] = statistics.pstdev(file_data[col_name])
            statistics_data[file_name][col_name][
                'population_variance'] = statistics.pvariance(
                    file_data[col_name])
            statistics_data[file_name][col_name][
                'sample_std_dev'] = statistics.stdev(file_data[col_name])
            statistics_data[file_name][col_name][
                'sample_variance'] = statistics.variance(file_data[col_name])

    create_plots(statistics_data)
示例#18
0
def ceaf(
    key: ty.Sequence[ty.Set],
    response: ty.Sequence[ty.Set],
    score: ty.Callable[[ty.Set, ty.Set], float],
) -> ty.Tuple[float, float, float]:
    r"""
    Compute the CEAF `$(R, P, F₁)$` scores for a `#response` clustering given a `#key` clustering
    using the `#score` alignment score function, that is
    ```math
    R &= \frac{∑_{k∈K}C(k, A(k))}{∑_{k∈K}C(k, k)}\\
    P &= \frac{∑_{r∈R}C(r, A⁻¹(r))}{∑_{r∈R}C(r, r)}\\
    F &= 2*\frac{PR}{P+R}
    ```
    Where `$C$` is `#score` and `$A$` is a one-to-one mapping from key clusters to response
    clusters that maximizes `$∑_{k∈K}C(k, A(k))$`.
    """
    if len(response) == 0 or len(key) == 0:
        return 0.0, 0.0, 0.0
    else:
        cost_matrix = np.array([[-score(k, r) for r in response] for k in key])
        # TODO: See https://github.com/allenai/allennlp/issues/2946 for ideas on speeding
        # the next line up
        row_ind, col_ind = linear_sum_assignment(cost_matrix)
        total_score = -cost_matrix[row_ind, col_ind].sum()
        R = total_score / math.fsum(score(k, k) for k in key)
        P = total_score / math.fsum(score(r, r) for r in response)
        F = harmonic_mean((R, P))
        return R, P, F
示例#19
0
    def split(self, text):
        """
        Splits the input text two two while trying to maximize the availability of parts in a defined corpus.

        :param text: input word
        :return: a dictionary containing `debug`, `base`, and `affix`.
        """
        debug = []
        max_score = 0
        best_split = None
        for i in range(1, len(text)):
            s1 = text[:i]
            s2 = text[i:]
            word_freq_s1 = self.data['dist'][s1]
            # Check base word (with ්) if s2 != ්
            if not s2.startswith('්'):
                s3 = text[:i] + '්'
                word_freq_s3 = self.data['dist'][s3]
                if word_freq_s3 > word_freq_s1:
                    word_freq_s1 = word_freq_s3
                    s1 = s3
            word_freq_s1 += 1
            count_words_ends_with_s2 = len(
                [w for w in self.data['words'] if w.endswith(s2)]) + 1
            score = harmonic_mean([word_freq_s1, count_words_ends_with_s2])
            if score > max_score:
                max_score = score
                best_split = (s1, s2)
            debug.append([s1, s2, score])
        return {
            'debug': debug,
            'base': best_split[0],
            'affix': best_split[1],
        }
示例#20
0
def feature_extraction_01(features,features_transformed ,overlapped_window, window_length):
    rows = features.shape[0]
    count = 0
    frequency  =0
    
    overlapped_window = window_calculation(overlapping = 90,sampling_rate = 25,window_length = window_length)
    
    for i in range(0,rows,overlapped_window):
        row_list = []
        for j in range(0,9):
            data_window = features.iloc[i:i+window_length,j]           
            #Feature 1: Mean
            mean = calculateMAV(data_window)
            #Feature 2 : Variance
            var = calculateVariance(data_window)            
            #Feature 3: Harmonic Mean
            hm = s.harmonic_mean(abs(data_window))
            #Featue 4: peaks abov absolute threshold
            peaks = calculatePeaks(data_window)
            #Feature 5: Skew
            skew = calculateSkew(data_window)
            #Feature 6: Energy
            energy = calculateEnergy(data_window)
            # Feature : Spectral Centroid
            sd = spectral_centroid(data_window, samplerate=25)

            row_list = row_list + [mean, var, hm, peaks, skew, energy ,sd ]
        features_transformed.loc[count,0:63]  = row_list
        count = count + 1
    
    return features_transformed
示例#21
0
def answer_rank(id, shared, sent, talker, expanded=0):
    '''ranks answer sentence id using several parameters'''

    lshared = len(shared)
    if not lshared: return 0

    sent_count = len(talker.db[0])
    #word_count=len(talker.db[1])

    lsent = len(sent)
    lavg = talker.avg_len
    srank = talker.pr.get(id)

    nrank = normalize_sent(srank, lsent, lavg)

    if nrank == 0: return 0

    def get_occ_count(x):
        return len(talker.db[1].get(x))

    unusual = sigmoid(1 - stat.harmonic_mean(get_occ_count(x)
                                             for x in shared) / sent_count)

    important = math.exp(nrank)

    # #r=stat.harmonic_mean((lshared,important,unusual))
    r = lshared * important * unusual

    if expanded: r = r / 2

    #ppp('RANKS:',10000*srank,'-->',10000*nrank,lsent,lavg)
    #ppp('HOW  :', id, lshared, unusual, important, shared,'--->',r)

    #r=math.tanh(r)
    return r
示例#22
0
def process_data(avg_score: list):
    score_lista = []
    error_lista = []
    xs = []
    avg_score = avg_score[:SIZE]
    groub_by = 3
    for idx, _ in enumerate(avg_score):
        if groub_by <= 1:
            val = avg_score[idx]
            # Score:
            score_lista.append(val)
            xs.append(idx)

            error_lista.append(0)

        elif idx % groub_by == 0 and idx > 0:
            if len(avg_score[idx:]) < groub_by:
                values = avg_score[idx - groub_by:]
            else:
                values = avg_score[idx - groub_by:idx + 1]
            # Score:
            val = statistics.harmonic_mean(values)
            score_lista.append(val)

            # Error:
            # val = statistics.harmonic_mean(error_values[idx - range:idx + 1])
            val = statistics.pstdev(values)
            error_lista.append(val)

            xs.append(idx)
    return xs, score_lista, error_lista
示例#23
0
def _iavg(*data: _num.Number) -> float:
    """
    Intuitive AVeraGe
    Returns the best average for the data set
    Use fraction.Fraction instances for ratios
    Ignores mode; nobody likes it
    
    >>> iavg(3, 4, 5)
    4
    >>> from fractions import Fraction as F
    >>> iavg(F(60, 20), F(200, 50), F(5, 1))
    Fraction(180, 47)
    """
    
    if len(data) == 1 and isinstance(data[0], (list, tuple)):
        data = data[0]
    
    target = 1.5 * iqr(*data)
    q = qdict(*data)
    for item in data:
        if item < q['q1'] - target or item > q['q3'] + target:
            return stat.median(data)
        elif isinstance(item, fraction.Fraction):
            return stat.harmonic_mean(data)
    else:
        return stat.fmean(data)
示例#24
0
def muc(
    key: ty.Sequence[ty.Set], response: ty.Sequence[ty.Set]
) -> ty.Tuple[float, float, float]:
    r"""
    Compute the MUC `$(R, P, F₁)$` scores for a `#response` clustering given a `#key` clustering,
    that is
    ```math
    R &= \frac{∑_{k∈K}(\#k-\#p(k, R))}{∑_{k∈K}(\#k-1)}\\
    P &= \frac{∑_{r∈R}(\#r-\#p(r, K))}{∑_{r∈R}(\#r-1)}\\
    F &= 2*\frac{PR}{P+R}
    ```
    with `$p(x, E)=\{x∩A|A∈E\}$`.

    In the edge case where all clusters in either `#key` or `#response` are singletons, `$P$`, `$R$`
    and `$F$` are defined to be `$0$`, following the reference implementation (since singleton
    clusters where not considered in Vilain et al. (1995).

    Note: This implementation is significantly different from the reference one (despite
    implementing the formulae from Pradahan et al. (2014) in that the reference use the ordering of
    mentions in documents to consistently assign a non-problematic spanning tree (viz. a chain) to
    each cluster, thus avoiding the issues that led Vilain et al. (1995) to define MUC by the
    formulae above.
    """
    # Edge case
    if all(len(k) == 1 for k in key) or all(len(r) == 1 for r in response):
        return 0.0, 0.0, 0.0
    R = sum(len(k) - sum(1 for _ in trace(k, response)) for k in key) / sum(
        len(k) - 1 for k in key
    )
    P = sum(len(r) - sum(1 for _ in trace(r, key)) for r in response) / sum(
        len(r) - 1 for r in response
    )
    F = harmonic_mean((R, P))
    return R, P, F
示例#25
0
def compute_cutoff(HAM_values, MVM_values):

    #store final results
    results = []

    number_of_metrics = len(list(HAM_values.keys()))
    number_of_instances = len(list(MVM_values.keys()))

    mivs_values = {}
    possible_defects = 0

    for key, values in MVM_values.items():

        mivs = sum(values)
        mivs_values[key] = mivs

        if mivs > (number_of_metrics / 2):
            possible_defects = possible_defects + 1

    #a list with the mivs results
    mivs_list = mivs_values.values()

    amivs = stats.mean(mivs_list)  #average of mivs
    amivs_plus = stats.mean(set(mivs_list))  #average of disticts mivs
    mivs_median = stats.median(mivs_list)  #median of mivs

    hmivs_mean = stats.harmonic_mean(
        [amivs_plus, mivs_median])  #hamonic mean of amivs+ and mivs_median

    possible_defects_rate = possible_defects / number_of_instances

    if possible_defects_rate > 0.5:
        cutoff = hmivs_mean * possible_defects_rate + (
            number_of_metrics - amivs) * (1 - possible_defects_rate)
    else:
        cutoff = 2 * number_of_metrics * (
            1 - possible_defects_rate) * hmivs_mean / number_of_metrics * (
                1 - possible_defects_rate) + hmivs_mean

    for instance, value in mivs_values.items():

        if value > cutoff:
            results.append({
                "Component": instance,
                "Threshold": value,
                "Cutoff": cutoff,
                "Status": "Fault_proner"
            })
            #print("{} : buggy".format(instance))
        else:
            results.append({
                "Component": instance,
                "Threshold": value,
                "Cutoff": cutoff,
                "Status": "Clean"
            })
            #print("{} : clean".format(instance))

    return results
示例#26
0
 def get_F1Scores(self, fine_index):
     precision_recall_stats = self.get_precision_recall(fine_index)
     
     within_coarse_stats = [precision_recall_stats["Precision_within_coarse"], precision_recall_stats["Recall_within_coarse"]]
     f1_macro_within_coarse = statistics.harmonic_mean(within_coarse_stats)
     
     out_of_coarse_stats = [precision_recall_stats["Precision_out_of_coarse"], precision_recall_stats["Recall_out_of_coarse"]]
     f1_macro_out_of_coarse = statistics.harmonic_mean(out_of_coarse_stats)
     
     overall_stats = [precision_recall_stats["Precision"], precision_recall_stats["Recall"]]
     f1_macro = statistics.harmonic_mean(overall_stats)
     
     return {
         "f1_macro_within_coarse": f1_macro_within_coarse,
         "f1_macro_out_of_coarse": f1_macro_out_of_coarse,
         "f1_macro": f1_macro,
     }
示例#27
0
def calc_all(vals):
    return {
        "mean": mean(vals),
        "gmean": geometric_mean(vals),
        "hmean": harmonic_mean(vals),
        "median": median(vals),
        "mid-range": mid_range(vals),
        "rms": root_mean_square(vals),
    }
示例#28
0
def MediaHarmonica(value):
    try:
        array = [float(numbers) for numbers in value.split(';')]
    except:
        return 'Os valores devem ser apenas numeros.'

    result = {"Resultado": harmonic_mean(array)}

    return result
示例#29
0
def compress(df):
    part_sum = df.loc[:, ['true_positive', 'false_positive', 'false_negative'
                          ]].sum()
    true_positive, false_positive, false_negative = part_sum
    part_sum['precision'] = true_positive / (true_positive + false_positive)
    part_sum['sensitivity'] = true_positive / (true_positive + false_negative)
    part_sum['f1_score'] = harmonic_mean(
        [part_sum['precision'], part_sum['sensitivity']])
    return part_sum
示例#30
0
 def get_F1Scores(self, coarse_index):
     precision_recall_stats = self.get_precision_recall(coarse_index)
     
     overall_stats = [precision_recall_stats["Precision"], precision_recall_stats["Recall"]]
     f1_macro = statistics.harmonic_mean(overall_stats)
     
     return {
         "f1_macro": f1_macro,
     }
 def mean_harmonic(self):
     '返回DataStruct.price的调和平均数'
     return self.price.groupby('code').apply(lambda x: statistics.harmonic_mean(x))
示例#32
0
 def mean_harmonic(self):
     return statistics.harmonic_mean(self.price)