def _assert_valid(self, visits):
        """Check if writes dont overlap other writes and reads"""

        # check that writes open and close consequently
        write_blocks = mo.split_at(visits, lambda x: x[1] == 'r')
        for write_block in write_blocks:
            for v1, v2 in mo.chunked(write_block, 2):
                self.assertEqual(v1[0], v2[0])

        # check that reads open and close in groups between writes
        read_blocks = mo.split_at(visits, lambda x: x[1] == 'w')
        for read_block in read_blocks:
            for v1, v2 in mo.chunked(sorted(read_block), 2):
                self.assertEqual(v1[0], v2[0])
Exemplo n.º 2
0
def shphrase(s):
    terms = []
    splits = list(SH_JOINS.keys())
    for idx, _t in enumerate(split_at(s, lambda x: x in splits, keep_separator=1)):
        t = "".join(_t)
        if idx % 2 == 0:
            terms.append("("+shterm(t)+")")
        else:
            terms.append(t)

    out = ""
    t1 = terms[0]
    i = 1
    if i == len(terms):
        return t1    
    else:
        while i < len(terms):
            op_s = terms[i]
            if op_s in SH_JOINS:
                op = SH_JOINS[op_s]
                t2 = terms[i+1]
                
                for k, v in SH_BACKREFS.items():
                    t2 = t2.replace(k, f"({t1})")
                
                if callable(op):
                    out += op(t1, t2)
                else:
                    out += f"({t1}.{op[0]}({t2}))"
                i += 2
    return out
Exemplo n.º 3
0
 def __parse_disposition_events(soup):
     events_title = soup.find("div", class_=SECTION_TITLE_CLASS, string="Events & Orders of the Court")
     events = list(events_title.parent.next_siblings)
     split_events = list(split_at(events, CaseParser.__is_other_events_and_hearings))
     assert len(split_events) == 2
     disposition_events, other_events = split_events
     return disposition_events
Exemplo n.º 4
0
    def cluster(self, ident: float = 0.95, out_path: str = 'dataset/raw/intervals.clusters.txt',
                n_threads: int = 0, max_mem_mb: int = 4000, min_seq_len: int = 50,
                input_path: t.Optional[str] = None) -> t.List[t.List[NamedInterval]]:

        def get_interval(cluster_rec: str, mapping: t.Mapping[t.Tuple[str, int, int], NamedInterval]) \
                -> NamedInterval:
            rec = cluster_rec.split()[2].rstrip('...').lstrip('>')
            name = rec.split('|')[0]
            start, stop = map(int, rec.split('|')[1].split('-'))
            return mapping[(name, start, stop)]

        if self.rolled is None:
            raise RequiresAttributeError('rolled')

        if input_path is None:
            input_path = self.dump_intervals()
        cmd = f'cd-hit -i {input_path} -o {out_path} -c {ident} ' \
              f'-T {n_threads} -M {max_mem_mb} -l {min_seq_len} -d 0'
        logging.info(f'Will run clustering command {cmd}')
        try:
            sp.run(cmd, shell=True, check=True)
        except sp.CalledProcessError as e:
            res = sp.run(cmd, shell=True, check=False, capture_output=True, text=True)
            raise RuntimeError(f'Command {cmd} failed with an error {e}, '
                               f'stdout {res.stdout} and stderr {res.stderr}')
        mapping_ = {(x.name, x.start, x.stop): x for x in chain.from_iterable(self.rolled)}
        with open(f'{out_path}.clstr') as f:
            clusters = list(map(
                lambda c: [get_interval(r, mapping_) for r in c],
                filter(bool, split_at(f, lambda x: x.startswith('>')))))
        logging.info(f'Clustered intervals into {len(clusters)} clusters')
        self.clusters = clusters
        return clusters
def get_element_block(xml_string: str,
                      first_name: str,
                      second_name: str = None,
                      include_initial: bool = True,
                      include_final: bool = True) -> str:
    """
    warning: use great caution if attempting to apply this function,
    or anything like it, to tags that that may appear more than once in the
    label. this _general type of_ approach to XML parsing works reliably
    only in the special case where tag names (or sequences of tag names,
    etc.) are unique (or their number of occurrences are otherwise precisely known)
    """
    if second_name is None:
        element_names = [first_name]
    else:
        element_names = [first_name, second_name]
    split = tuple(
        split_at(xml_string.splitlines(),
                 are_in(element_names, or_),
                 keep_separator=True))
    chunk = split[2]
    if include_initial:
        chunk = split[1] + chunk
    if include_final:
        chunk = chunk + split[3]
    return "\n".join(chunk)
Exemplo n.º 6
0
  def __init__(self, input_, recursive = False):

    self.players = []
    self.decks = []
    self.winner = None
    self.recursive = recursive

    if recursive:
      self.previous_positions = set()

    for player in split_at(input_, lambda x: not x.strip()):

      deck = []
        
      for line in player:

        if line.startswith("Player"):
          
          self.players.append(line.split()[1].strip(":\n"))

        else:

          deck.append(int(line))

      self.decks.append(deck)
Exemplo n.º 7
0
    def _get_pulldown(flags: List[dict]) -> Tuple[int, Optional[str]]:
        """
        Get most commonly used Pulldown cycle and syntax string.
        Returns tuple (pulldown, cycle), or (0, None) if Pulldown is not used.
        """
        # TODO: Find a safe way to get cycle, i.e. not resort to most common digit.
        #       Previously I would do this code on all progressive rff indexes, but when it entered and
        #       exited interlaced sections the right index vs left index were very far apart, messing up
        #       the accuracy of the cycles. I cannot find out why my test source (Family Guy S01E01 USA
        #       NTSC) is still having random different numbers in each (now progressive only) sections.
        sections = [
            section
            for split in split_at([dict(x, i=n) for n, x in enumerate(flags)],
                                  lambda flag: not flag["progressive_frame"])
            for section in
            [[flag["i"] for flag in split if flag["rff"] and flag["tff"]]]
            if section and len(section) > 1
        ]
        if not sections:
            return 0, None

        cycle = Counter([
            Counter([(right - left)
                     for left, right in zip(indexes[::2], indexes[1::2])
                     ]).most_common(1)[0][0] for indexes in sections
        ]).most_common(1)[0][0] + 1

        pulldown = ["2"] * math.floor(cycle / 2)
        if cycle % 2:
            pulldown.pop()
            pulldown.append("3")

        return cycle, ":".join(pulldown)
Exemplo n.º 8
0
 def __init__(self, ID, l):
     if l == 'a' or l == 'b':
         self.val = l
     self.ID = ID
     self.branches = list(split_at(l, lambda x: x = '|'))
 
 
         
Exemplo n.º 9
0
 def __enter__(self):
     self.file = open(self.input_path)
     self.file.__enter__()
     lines = self.file.readlines()
     blocks = list(
         split_at([s.rstrip('\n') for s in lines], lambda l: l == ""))
     return blocks if self.cons is None else map(lambda b: self.cons(b),
                                                 blocks)
Exemplo n.º 10
0
def convert_data():
    _rules, _my_ticket, _tickets = list(
        mit.split_at(input_data, pred=lambda x: x == ""))

    _rules = get_rules(_rules)
    _my_ticket = list(map(int, _my_ticket[1].split(",")))
    _tickets = get_tickets(_tickets[1:])

    return _rules, _my_ticket, _tickets
Exemplo n.º 11
0
def get_words_from_hagen_dictionary() -> Iterable[Word]:
    with open('hagen-morph.txt', encoding='windows-1251') as file:
        rows = (line.split('|') for line in file)
        articles = mit.split_at(rows, lambda row: len(row) < 4)
        stripped_articles = map(strip_article, articles)
        stripped_article_rows = (row for article in stripped_articles
                                 for row in article)
        words = map(row_to_word, stripped_article_rows)
        for word in words:
            yield word
Exemplo n.º 12
0
def process_input(input_str):
    passports = split_at(input_str, lambda x: x == '')

    def process_passports(passport):
        fields = flatten(map(lambda x: x.split(' '), passport))
        split_fields = map(lambda x: x.split(':'), fields)
        filtered = filter(lambda x: x[0] != 'cid', split_fields)
        return dict(filtered)

    return map(process_passports, passports)
Exemplo n.º 13
0
def read_input_files(input_file: str) -> tuple[list[str], list[str]]:
    """
    Extracts a grammar list.
    """
    with open(input_file) as input_fobj:
        rules, messages = more_itertools.split_at(
            input_fobj, lambda line: not line.strip())
        rules = [r.strip() for r in rules]
        messages = [m.strip() for m in messages]
    return rules, messages
Exemplo n.º 14
0
def read_input_files(input_file: str) -> list[list[str]]:
    """
    Extracts a list of surveys from the input file.
    Each survey is returned as a list of individual response strings.
    The presence of a letter in the string indicates the yes-answer.
    """
    with open(input_file) as input_fobj:
        surveys = [[word.strip() for word in chunk]
                   for chunk in more_itertools.split_at(
                       input_fobj, lambda line: not line.strip())]
    return surveys
Exemplo n.º 15
0
def read_input_files(input_file: str) -> dict[int, Tile]:
    """
    Extracts a dictionary mapping of input tile numbers to actual tiles.
    """
    with open(input_file) as input_fobj:
        tiles = {
            parse_raw_head(head.strip()): Tile.from_raw_body(body)
            for head, *body in more_itertools.split_at(
                input_fobj, lambda line: not line.strip())
        }
    return tiles
Exemplo n.º 16
0
def read_input_files(input_file: str) -> tuple[DeckInfo, DeckInfo]:
    """
    Extracts a pair of starting decks where each deck is a list of cards.
    """
    with open(input_file) as input_fobj:
        fst, snd = more_itertools.split_at(input_fobj,
                                           pred=lambda line: not line.strip())
        fst = DeckInfo.from_raw(fst)
        snd = DeckInfo.from_raw(snd)
        assert fst.player_name == 'Player 1' and snd.player_name == 'Player 2'
    return fst, snd
Exemplo n.º 17
0
def no_of_horizontal_strokes(heights):
    """let heights be an array of positive integers describing heights of uniform-width rectangles placed one-by-one.
    imagine we are painting the rectangles horizontally with single strokes, the height of the brush is 1.
    the function computes the minimal number of strokes needed to paint the rectangles."""

    strokes = min(heights)
    heights = [x - strokes for x in heights]
    for subarray in itertools.filterfalse(lambda part: part == [],
                                          split_at(heights, lambda n: n == 0)):
        strokes += no_of_horizontal_strokes(subarray)

    return strokes
Exemplo n.º 18
0
def main():
  
  rules, messages = split_at(fileinput.input(), lambda x: x.strip() == "")

  rules = Rules(rules)

  print(sum(rules.check_rule(message.strip(), "0") for message in messages))
  
  rules.parse_rule("8: 42 | 42 8")
  rules.parse_rule("11: 42 31 | 42 11 31")

  print(sum(rules.check_rule(message.strip(), "0") for message in messages))
Exemplo n.º 19
0
def test(test_data, write_path):
    corrects = 0
    with open(write_path, 'w', encoding='utf8') as f:
        for i, d in enumerate(test_data):
            dy.renew_cg()
            output = generate(d[0], enc_fwd_lstm, enc_bwd_lstm, dec_lstm)
            to_write = list(split_at(d[0], lambda x: x == '+'))
            to_write[0] = ''.join(to_write[0])
            to_write[1] = ';'.join(to_write[1])
            to_write[2] = ';'.join(to_write[2])
            f.write('\t'.join(to_write + [''.join(d[1]), output]) + '\n')

            if output == ''.join(d[1]):
                corrects += 1
    print(corrects / len(test_data))
    print(write_path, 'written')
    return corrects / len(test_data)
Exemplo n.º 20
0
    def from_bytes(index_bytes: bytearray):

        _index_type, index_bytes = uvarint.cut(1, index_bytes).integers[0], uvarint.cut(1, index_bytes).rest

        if _index_type != 0:
            raise Exception("This is not an EliasFano index!")

        _n, _lower_bits, _upper_bits, inferiors_byte_count, superiors_byte_count = uvarint.cut(5, index_bytes).integers

        bytes_iter = iter(uvarint.cut(5, index_bytes).rest)

        if inferiors_byte_count:
            inferiors = ("{0:0%db}" % (_n * _lower_bits)).format(
                int.from_bytes(take(inferiors_byte_count, bytes_iter), 'little', signed=False))

            _inferiors = list(map(lambda inf: int("".join(inf), 2),
                                  windowed(iter(inferiors), _lower_bits,
                                           step=_lower_bits)))
        else:
            _inferiors = []

        if superiors_byte_count:
            # superiors contains exactly '2**(upper_bits)' 0s and exactly 'n' 1s
            superiors = ("{0:0%db}" % (_n + 2 ** _upper_bits)).format(
                int.from_bytes(take(superiors_byte_count, bytes_iter), 'little', signed=False))

            _superiors = list(map(lambda x: len(x),
                                  split_at(iter(superiors),
                                           lambda v: v == '0', keep_separator=False)))[0:-1]

            _superiors_prefixSums = list(accumulate(_superiors))
        else:
            _superiors = []
            _superiors_prefixSums = []

        # TODO: implement appropriate constructor
        ef_index = EliasFano([0])
        ef_index._n = _n
        ef_index._u = 2 ** max(1, _lower_bits + _upper_bits)
        ef_index._lower_bits = _lower_bits
        ef_index._upper_bits = _upper_bits
        ef_index._inferiors = _inferiors
        ef_index._superiors = _superiors
        ef_index._superiors_prefixSums = _superiors_prefixSums

        return ef_index
Exemplo n.º 21
0
    def md_to_dict(cls, lines):
        all_unit_data = list(
            more_itertools.split_at(
                    lines, 
                    lambda x: x.strip('\n') == DATA_SEP
                )
        )
        learning_units = []
        module_data = None
        for unit_lines in all_unit_data:
            if unit_lines:
                metadata = _metadata_to_dict(unit_lines)
                display_content = _display_content_to_dict(unit_lines)
                clean_display_content = display_content.strip('\n')
                try:
                    unit_data = metadata.get('meta', {}).get('data', {})
                    order = unit_data.pop('order', 0)
                    unit_type = metadata.get('meta', {}).get('type', None)
                    unit_cls = LessonData if unit_type == 'lesson' else QuizData
                except KeyError:
                    print("[ERROR] Data not found. Please check the MD file")

                unit_data.update(
                    {getattr(unit_cls,'display_content_field'): clean_display_content,}
                )
                if unit_type == 'lesson':
                    learning_unit_data = {
                        "order": order,
                        "type": unit_type,
                        "quiz": None,
                        "lesson": unit_data,
                    }
                    learning_units.append(learning_unit_data)
                elif unit_type == 'quiz':
                    learning_unit_data = {
                        "order": order,
                        "type": unit_type,
                        "quiz": unit_data,
                        "lesson": None,
                    }
                    learning_units.append(learning_unit_data)
                elif unit_type == 'module':
                    module_data = ModuleData.md_to_dict(unit_lines)

        return {'module': module_data, 'learning_units': learning_units}
Exemplo n.º 22
0
def run() -> None:
    with open_input(__file__) as file:
        lines = file.read().splitlines()

        groups: List[List[str]] = list(split_at(lines,
                                                lambda line: line == ""))

        # part 1
        result = sum(
            pipe(chain.from_iterable(group), set, len)
            for group in groups)  # type: ignore
        print(result)

        # part 2
        result = sum(
            len(reduce(set.intersection, map(set, group)))  # type: ignore
            for group in groups)
        print(result)
Exemplo n.º 23
0
def main(input, part):
    # Iterator of lines
    lines = map(lambda x: x.strip(), input.readlines())
    # Iterator of lists of strings
    blocks = split_at(lines, lambda x: x == "")
    # Iterator of lists of sets of chars
    sets = map(lambda block: map(set, block), blocks)
    if part == "1":  # Set of any yes (union)
        set_combiner = set.union
    elif part == "2":  # Set of all yes (intersection)
        set_combiner = set.intersection
    # Iterator of sets of chars (according to set_combiner function)
    sets = map(apply(set_combiner), sets)
    # Iterator of ints (counts of sets)
    count_yes_answers = map(len, sets)
    # Summation of all ints (total count)
    count_yes_answers = sum(count_yes_answers)
    print(count_yes_answers)
Exemplo n.º 24
0
    def _topology_builder(
        self,
        coordinates: List[Tuple[float, float]],
        points_intersections: Set[Tuple[float, float]],
    ):

        is_rebuild = False
        coordinates_updated: List[List[Tuple[float, float]]] = []

        # split coordinates found at intersection to respect the topology
        first_value, *middle_coordinates_values, last_value = coordinates
        for point_intersection in points_intersections:

            point_intersection = tuple(point_intersection)

            if point_intersection in middle_coordinates_values:
                # we get the middle values from coordinates to avoid to catch the first and last value when editing

                middle_coordinates_values = self._insert_value(
                    middle_coordinates_values,
                    point_intersection,
                    tuple([point_intersection]),
                )

                middle_coordinates_values = self._insert_value(
                    middle_coordinates_values,
                    point_intersection,
                    self.__ITEM_LIST_SEPARATOR_TO_SPLIT_LINE,
                    "after",
                )
                coordinates = [first_value
                               ] + middle_coordinates_values + [last_value]
                is_rebuild = True

        if is_rebuild:
            coordinates_updated = list(
                split_at(coordinates, lambda x: x == "_"))

        if not is_rebuild:
            coordinates_updated = list([coordinates])

        return coordinates_updated
Exemplo n.º 25
0
def read_input_files(
        input_file: str) -> tuple[list[Rule], Ticket, list[Ticket]]:
    """
    Extracts a rules set, my own ticket, and a list of nearby tickets.
    """
    with open(input_file) as input_fobj:
        stripped_input = (line.strip() for line in input_fobj)
        rules, my_ticket, nearby_tickets = more_itertools.split_at(
            stripped_input, pred=lambda line: not line)
        rules = [Rule.from_raw(r) for r in rules]
        num_attrs = len(rules)

        assert len(my_ticket) == 2 and my_ticket[0].strip() == "your ticket:"
        my_ticket = Ticket.from_raw(my_ticket[1])
        assert len(my_ticket.attrs) == num_attrs

        assert len(nearby_tickets) >= 2 and nearby_tickets[0].strip(
        ) == "nearby tickets:"
        nearby_tickets = [Ticket.from_raw(t) for t in nearby_tickets[1:]]
        assert all(len(t.attrs) == num_attrs for t in nearby_tickets)

    return rules, my_ticket, nearby_tickets
Exemplo n.º 26
0
def main():

    tiles = [
        Tile(t) for t in split_at(fileinput.input(), lambda x: not x.strip())
    ]

    grid_dim = int(sqrt(len(tiles)))

    grid = Grid(grid_dim)

    solution = grid.solve(tiles)

    if solution:
        print(reduce(mul, solution.corners(), 1))

    image = solution.assemble(tiles)

    monsters = image.find(MONSTER)

    roughness = "".join(image.grid).count("#") - monsters * MONSTER.count("#")

    print(roughness)
Exemplo n.º 27
0
def eval_adv(expr: List):
    if all([isinstance(_, int) for _ in expr]):
        return reduce(mul, expr, 1)
    if any([isinstance(_, list) for _ in expr]):
        newels = []
        for el in expr:
            if isinstance(el, list):
                newels.append(eval_adv(el))
            else:
                newels.append(el)
        return eval_adv(newels)

    newvals = []
    for val in split_at(expr, lambda x: x == "*"):
        if "+" in val:
            newval = lfilter(lambda x: x != "+", val)
            newval = lmap(int, newval)
            newval = sum(newval)
            newvals.append([newval])
        else:
            newvals.append(lmap(int, val))

    return eval_adv(list(concat(newvals)))
Exemplo n.º 28
0
def main(input, part):
    # Iterator of lines
    lines = map(lambda x: x.strip(), input.readlines())
    # Iterator of key-value pair strings
    entries = flatten(map(lambda x: x.split(" "), lines))
    # Iterator of key-value pair tuples
    entries = map(lambda x: x.split(":"), entries)
    # Iterator of lists of key-value pairs (split on empty string)
    blocks = split_at(entries, lambda x: x == [""])
    # Iterator of dicts
    dicts = map(dict, blocks)

    # Start applying filters, and print length
    dicts = filter(filter_required_keys, dicts)
    if part == "2":
        dicts = filter(filter_birth_year, dicts)
        dicts = filter(filter_issue_year, dicts)
        dicts = filter(filter_expire_year, dicts)
        dicts = filter(filter_height, dicts)
        dicts = filter(filter_hair_color, dicts)
        dicts = filter(filter_eye_color, dicts)
        dicts = filter(filter_passport_id, dicts)
    print(ilen(dicts))
Exemplo n.º 29
0
import more_itertools

lines = [
    "erhgedrgh",
    "erhgedrghed",
    "esdrhesdresr",
    "ktguygkyuk",
    "-------------",
    "srdthsrdt",
    "waefawef",
    "ryjrtyfj",
    "-------------",
    "edthedt",
    "awefawe",
]

list(more_itertools.split_at(lines, lambda x: '-------------' in x))
#  [['erhgedrgh', 'erhgedrghed', 'esdrhesdresr', 'ktguygkyuk'], ['srdthsrdt', 'waefawef', 'ryjrtyfj'], ['edthedt', 'awefawe']]
Exemplo n.º 30
0
 def get_next_concept(self, lexed):
     current_block = list(split_at(lexed[self.position+1:], lambda c: c is None))[0]
     return find_highest_precedence_concept(current_block)