def build_feature(df):
    words = get_words(df)
    counts = Counter(words)
    weights = {word: get_weight(count) for word, count in counts.items()}
    df['word_shares'] = df.apply(lambda x: word_shares(x, weights), axis=1)
    x = get_feature(df)
    x = x.fillna(0)
    return x
def getIndividualSupport(dataDictionary, totalTransactions, threshold=2):
    dataDict = Counter(sorted(dataDictionary))
    cleanedDict = [item for item in dataDict.items() if item[1] >= threshold]
    supportList = []
    for item in cleanedDict:
        if item[1] >= threshold:
            supportList.append((item[0], item[1] / totalTransactions))
    return supportList
예제 #3
0
파일: 29-3.py 프로젝트: Cenibee/PYALG
    def numJewelsInStones(self, jewels: str, stones: str) -> int:
        freqs = Counter(stones)
        count = 0

        for char in jewels:
            count += freqs[char]

        return count
예제 #4
0
def count_distances(coords: List[Coord], treshold: int) -> int:
    grid = build_grid(coords)
    total_distance = Counter()
    for loc in grid.all_locations():
        for coord in coords:
            total_distance[loc] += loc.distance(coord)
    values = total_distance.values()
    return len([v for v in values if v < treshold])
예제 #5
0
 def __post_init__(self):
     counter = Counter(item.type.value for item in self.items)
     for key, count in counter.items():
         if not self.top_level and count > 1 and key not in PLURAL_KEYS:
             raise KeyError(
                 f'Key "{key}" already exists in tree and would overwrite the '
                 "existing value."
             )
예제 #6
0
 def _used_properties_with_type(
         self, property_type: PropertyType) -> Counter[PropertyIdentifier]:
     return Counter({
         (name, type, group_type_index): count
         for (name, type, group_type_index
              ), count in self.properties_used_in_filter.items()
         if type == property_type
     })
예제 #7
0
 def __init__(self, parallelism: int = PARALLELISM):
     super().__init__()
     self.parallelism: int = parallelism
     self.queued_tasks: OrderedDict[TaskInstanceKey,
                                    QueuedTaskInstanceType] = OrderedDict()
     self.running: Set[TaskInstanceKey] = set()
     self.event_buffer: Dict[TaskInstanceKey, EventBufferValueType] = {}
     self.attempts: Counter[TaskInstanceKey] = Counter()
예제 #8
0
    def customSortString_counter(self, order: str, s: str) -> str:
        counter, ans = Counter(s), ""
        for c in order:
            if c in counter:
                ans += c * counter[c]
                counter.pop(c)

        return ans + "".join(c * counter[c] for c in counter)
예제 #9
0
def modify_ner_data(data: str):
    root_dir = './data/onto'
    dataset = data + '.corpus'
    match_counter, conti_counter, not_match_counter = Counter(), Counter(), Counter()
    match, conti_match, not_match = 0, 0, 0
    trees = load_trees(os.path.join(root_dir, 'parsing_char', dataset))
    ner_snts, ner_golds = load_data_from_file(os.path.join(root_dir, 'ner_char', dataset))
    assert len(trees) == len(ner_snts)
    with open(os.path.join(root_dir, 'ner_char', dataset), 'w', encoding='utf-8') as ner_writer:
        for snt, ner_gold, tree in zip(ner_snts, ner_golds, trees):
            snt, ner_gold = snt.split(), ner_gold.split()
            assert len(list(tree.leaves())) == len(snt)
            spans = _bio_tag_to_spans(ner_gold)
            for span in spans:
                match_type = tree.ner_match(span[1][0], span[1][1], False, False)
                if match_type == 0:
                    not_match += 1

                    # ======================================
                    for i in range(span[1][0], span[1][1]):
                        ner_gold[i] = 'O'
                    # ======================================

                    not_match_counter.update([span[0]])
                elif match_type == 1:
                    match_counter.update([span[0]])
                    match += 1
                elif match_type == 2:
                    conti_counter.update([span[0]])
                    conti_match += 1
                else:
                    print('error')
                    exit(-1)

            # ======================================
            assert len(snt) == len(ner_gold)
            for char, ner in zip(snt, ner_gold):
                ner_writer.write(char+'\t'+ner+'\n')
            ner_writer.write('\n')
            # ======================================

    print(match, conti_match, not_match, not_match/(not_match+match+conti_match))
    print(match_counter)
    print(conti_counter)
    print(not_match_counter)
예제 #10
0
 def minSetSize_counter_and_sort(self, arr: List[int]) -> int:
     res, half = 0, len(arr) // 2
     c = Counter(arr)
     for i in sorted(c.values(), reverse=True):
         half -= i
         res += 1
         if half <= 0:
             break
     return res
예제 #11
0
def most_prolific_automaker(year: str) -> str:
    """Given year 'year' return the automaker that released the highest number
    of new car models."""
    automakers_by_year = [
        item.get("automaker") for item in DATA if item.get("year") == year
    ]
    automaker_frequencies = Counter(automakers_by_year)
    (automaker, _), *_ = automaker_frequencies.most_common()
    return automaker
예제 #12
0
파일: 791.py 프로젝트: datpham19/leetcode
 def customSortString1(self, S: str, T: str) -> str:
     count = Counter(T)
     answer = ''
     for s in S:
         answer += s * count[s]
         count[s] = 0
     for c in count:
         answer += c * count[c]
     return answer
예제 #13
0
def main():
    st = str_r()
    ctr = dict(Counter(st))
    total = factorial(len(st))
    for _, val in ctr.items():
        if val > 1:
            total = total // factorial(val)
    outStr("{}\n".format(total))
    outStr("\n".join(sorted(set(["".join(p) for p in permutations(st)]))))
    def minimumHammingDistance(self, source: List[int], target: List[int],
                               allowedSwaps: List[List[int]]) -> int:
        uf = UnionFind(len(source))

        for s in allowedSwaps:
            uf.union(s[0], s[1])

        groups = defaultdict(lambda: {"s": [], "t": []})

        for i, (s, t) in enumerate(zip(source, target)):
            parent = uf.find(i)
            groups[parent]["s"].append(s)
            groups[parent]["t"].append(t)

        ret = 0
        for group in groups.values():
            ret += sum((Counter(group["s"]) - Counter(group["t"])).values())
        return ret
예제 #15
0
def haskell_loc(directory: Path) -> Counter[str]:
    total_count: Counter[str] = Counter()
    for filepath in directory.glob("src/**/*.hs"):
        if should_exclude_file(filepath):
            continue
        with open(filepath, "r") as f:
            lines = f.readlines()
            total_count += count_haskell_loc(lines)
    return total_count
예제 #16
0
    def minWindow(self, s: str, t: str) -> str:
        t_count = Counter(t)
        current_count = Counter()

        start = float('-inf')
        end = float('inf')

        left = 0
        for right, char in enumerate(s, 1):
            current_count[char] += 1

            while current_count & t_count == t_count:
                if right - left < end - start:
                    start, end = left, right
                current_count[s[left]] -= 1
                left += 1

        return s[start:end] if end - start <= len(s) else ''
예제 #17
0
파일: 80-1.py 프로젝트: Cenibee/PYALG
    def leastInterval(self, tasks: List[str], n: int) -> int:
        counter = Counter(tasks)
        result = 0

        while True:
            sub_count = 0
            for task, _ in counter.most_common(n + 1):
                sub_count += 1
                result += 1

                counter.subtract(task)
                counter += Counter()

            if not counter:
                break

            result += n - sub_count + 1
        return result
예제 #18
0
def __create_probability_table(symbol_list):
    frequency_table = Counter(symbol_list)
    N = sum(frequency_table.values())
    assert N != 0

    return {
        symbol: Decimal(freq) / N
        for symbol, freq in frequency_table.items()
    }
def genome_stat(top: str, genomes: list, fna_file: str, output=False) -> None:
    genome_stat_lis = []

    for genome in genomes:
        path = os.path.join(top, genome, fna_file)
        headers = []
        header_len_lis = []
        big_str_len = 0.0

        with open(path) as f:
            header_len = 0
            for line in f:
                if line.startswith('>'):
                    header_len_lis.append(header_len)
                    header_len = 0
                    headers.append(line.split(' ')[0])
                else:
                    big_str_len += len(line.replace('\n', ''))
                    header_len += len(line.replace('\n', ''))
        del (header_len_lis[0])

        #test contig dup
        hdic = dict(Counter(headers))
        dup_lis = [key for key, value in hdic.items() if value > 1]
        dup_dic = {key: value for key, value in hdic.items() if value > 1}
        avg_len = float(big_str_len) / float(len(headers))

        if dup_lis == [] and dup_dic == {}:
            genome_stat_lis.append(
                (genome, len(headers), avg_len, header_len_lis))
        else:
            print(genome + ': failed!')

        genome_stat_lis = sorted(genome_stat_lis,
                                 key=lambda x: x[2],
                                 reverse=True)
        # print(genome_stat_lis)

    if output:
        with open('genome_stat.txt', 'w') as out:
            for genome_pair in genome_stat_lis:
                genome = genome_pair[0]
                contigs = genome_pair[1]
                avg_len = genome_pair[2]
                header_len_lis = genome_pair[3]

                len_lis = [1000, 2000, 5000, 10000, 20000, 50000]
                len_dict = stat_by_len(len_lis, header_len_lis)

                print(genome, end='')
                print("\tcontigs: " + str(contigs) + "\tavg_len: " +
                      str(avg_len))
                out.writelines(genome)
                out.writelines("\tcontigs: " + str(contigs) + "\tavg_len: " +
                               str(avg_len))
                # out.writelines("\tmax = " + str(max_len) + "\tmin =  " + str(min_len))
                out.writelines(str(len_dict) + '\n')
예제 #20
0
파일: ref.py 프로젝트: timqsh/advent2020
def next_state_(state: State) -> State:
    count: Counter[Tuple[int, ...]] = Counter()
    for point in state:
        for delta in product([-1, 0, 1], repeat=len(point)):
            if not any(delta):
                continue
            neighbor = tuple((sum(d) for d in zip(point, delta)))
            count[neighbor] += 1
    return {p for p, c in count.items() if c == 3 or c == 2 and p in state}
예제 #21
0
def gather_stats_good(
    n: int, samples: int = 1000, summary: Optional[Counter[int]] = None
) -> Counter[int]:
    if summary is None:
        summary = Counter()
    summary.update(
        sum(randint(1, 6)
            for d in range(n)) for _ in range(samples))
    return summary
예제 #22
0
파일: __init__.py 프로젝트: petrows/checkmk
def _commandline_discovery_on_host(
    host_name: HostName,
    ipaddress: Optional[HostAddress],
    parsed_sections_broker: ParsedSectionsBroker,
    run_plugin_names: Container[CheckPluginName],
    only_new: bool,
    *,
    load_labels: bool,
    only_host_labels: bool,
    on_error: OnError,
) -> None:

    section.section_step("Analyse discovered host labels")

    host_labels = analyse_node_labels(
        host_name=host_name,
        ipaddress=ipaddress,
        parsed_sections_broker=parsed_sections_broker,
        load_labels=load_labels,
        save_labels=True,
        on_error=on_error,
    )

    count = len(host_labels.new) if host_labels.new else (
        "no new" if only_new else "no")
    section.section_success(f"Found {count} host labels")

    if only_host_labels:
        return

    section.section_step("Analyse discovered services")

    service_result = analyse_discovered_services(
        host_name=host_name,
        ipaddress=ipaddress,
        parsed_sections_broker=parsed_sections_broker,
        run_plugin_names=run_plugin_names,
        only_new=only_new,
        on_error=on_error,
    )

    # TODO (mo): for the labels the corresponding code is in _host_labels.
    # We should put the persisting in one place.
    autochecks.save_autochecks_file(host_name, service_result.present)

    new_per_plugin = Counter(s.check_plugin_name for s in service_result.new)
    for name, count in sorted(new_per_plugin.items()):
        console.verbose("%s%3d%s %s\n" %
                        (tty.green + tty.bold, count, tty.normal, name))

    count = len(service_result.new) if service_result.new else (
        "no new" if only_new else "no")
    section.section_success(f"Found {count} services")

    for detail in check_parsing_errors(
            parsed_sections_broker.parsing_errors()).details:
        console.warning(detail)
예제 #23
0
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:

        counts = Counter(nums).most_common()

        res = []
        for i in range(k):
            res.append(counts[i][0])

        return res
예제 #24
0
 def frequencySort(self, s: str) -> str:
     # Counter로 aggregate 하고
     counter = Counter(s).most_common()
     ans = []
     # count 만큼 곱해서 append로 모은 후
     for ch, cnt in counter:
         ans.append(ch * cnt)
     # join 하여 반환
     return ''.join(ans)
예제 #25
0
def calculate_template_score(template):
    """Calculate the score of an element template.

    The score is the count of the most frequent element minus the
    count of the least frequent.
    """
    c = Counter(template)
    max_el = max(c, key=lambda x: c[x])
    min_el = min(c, key=lambda x: c[x])
    return c[max_el] - c[min_el]
예제 #26
0
 def numberOfArithmeticSlices(self, A):
     # dp Time: O(n^2) Space: O(n^2)
     total, n = 0, len(A)
     dp = [Counter() for item in A]
     for i in range(n):
         for j in range(i):
             diff = A[i] - A[j]
             dp[i][diff] += dp[j][diff] + 1
         total += sum(dp[i].values())
     return total - (n - 1) * n // 2
예제 #27
0
 def __init__(self, bot):
     self.bot = bot
     self.config = Config.get_conf(self,
                                   1398467138476,
                                   force_registration=True)
     default_global = {
         "globaldata": Counter({}),
         "guilddata": {},
         "automated": Counter({})
     }
     self.config.register_global(**default_global)
     self.cache = {
         "guild": {},
         "session": Counter({}),
         "automated": Counter({})
     }
     self.session = Counter()
     self.session_time = datetime.datetime.utcnow()
     self.bg_loop_task = self.bot.loop.create_task(self.bg_loop())
예제 #28
0
파일: holo.py 프로젝트: minhptx/spade
    def fit(self, values):
        trigram = [["".join(x) for x in list(xngrams(val, 3))]
                   for val in values]
        ngrams = list(itertools.chain.from_iterable(trigram))
        self.trigram_counter = Counter(ngrams)
        sym_ngrams = [str2regex(x, False) for x in ngrams]

        self.sym_trigram_counter = Counter(sym_ngrams)
        self.val_counter = Counter(values)

        sym_values = [str2regex(x, False) for x in values]
        self.sym_val_counter = Counter(sym_values)

        self.func2counter = {
            val_trigrams: self.trigram_counter,
            sym_trigrams: self.sym_trigram_counter,
            value_freq: self.val_counter,
            sym_value_freq: self.sym_val_counter,
        }
예제 #29
0
파일: run.py 프로젝트: lkarjun/usageReport
def counting(apps: List[str], runningapps: List[str]) -> float:
        running_apps = []
        n = 0
        while True:
                [running_apps.append(i) for i in runningapps]
                n += 60
                if n == 360:
                        return Counter(running_apps)
                        break
                sleep(1)
예제 #30
0
def main():
    class1 = ["Bob", "Jams", "Jams"]
    class2 = ["Bill", "Jams", "Joe"]
    c1 = Counter(class1)
    c2 = Counter(class2)

    print(c1["Jams"])

    print(sum(c1.values()), "in clas 1")

    c1.update(class2)
    print(sum(c1.values()), "in clas 1")

    print(c1.most_common(3))

    c1.subtract(class2)
    print(c1.most_common(3))

    print(c1 & c2)