Exemplo n.º 1
0
def repair_depfile(depfile: TextIO, include_dirs: List[Path]) -> None:
    changes_made = False
    out = ""
    for line in depfile.readlines():
        if ":" in line:
            colon_pos = line.rfind(":")
            out += line[: colon_pos + 1]
            line = line[colon_pos + 1 :]

        line = line.strip()

        if line.endswith("\\"):
            end = " \\"
            line = line[:-1].strip()
        else:
            end = ""

        path = Path(line)
        if not path.is_absolute():
            changes_made = True
            path = resolve_include(path, include_dirs)
        out += f"    {path}{end}\n"

    # If any paths were changed, rewrite the entire file
    if changes_made:
        depfile.seek(0)
        depfile.write(out)
        depfile.truncate()
Exemplo n.º 2
0
def read_poetry_form_descriptions(
        poetry_forms_file: TextIO) -> POETRY_FORMS:
    """Return a dictionary of poetry form name to poetry pattern for the poetry
    forms in poetry_forms_file.

    >>> import io
    >>> form_file = io.StringIO(SAMPLE_POETRY_FORM_FILE)
    >>> result = read_poetry_form_descriptions(form_file)
    >>> result == EXPECTED_POETRY_FORMS
    True
    """
    poetry_forms_file_list = poetry_forms_file.readlines()
    index_of_line_currently_being_read = 0
    name = ""
    poem_dictionary = {}
    
    while index_of_line_currently_being_read < len(poetry_forms_file_list):
        if poetry_forms_file_list[index_of_line_currently_being_read].strip().isalpha():
            name = poetry_forms_file_list[index_of_line_currently_being_read].strip()
            index_of_line_currently_being_read += 1
        elif poetry_forms_file_list[index_of_line_currently_being_read] == "\n":
            index_of_line_currently_being_read += 1
        else:
            (poetry_pattern, index_of_line_currently_being_read) = read_single_poetry_form(poetry_forms_file_list, index_of_line_currently_being_read)
            poem_dictionary[name] = poetry_pattern
    return poem_dictionary
Exemplo n.º 3
0
def read_routes(routes_source: TextIO, airports: AirportDict) -> RouteDict:
    """Return the flight routes from routes_source, including only the ones
    that have an entry in airports. If there are multiple routes between
    routes_source and a destination (on different airlines for example),
    include the destination only once. Routes that include null airport IDs
    should still be included, but routes that have empty IATA should be
    excluded.

    >>> from io import StringIO
    >>> routes_src = StringIO(TEST_ROUTES_SRC)
    >>> actual = read_routes(routes_src, TEST_AIRPORTS_DICT)
    >>> actual == TEST_ROUTES_DICT_FOUR_CITIES
    True
    """

    routes = {}
    src_index = ROUTE_DATA_INDEXES['Source airport']
    dst_index = ROUTE_DATA_INDEXES['Destination airport']
    lst = []
    for key in airports:
        lst.append(key)
    for line in routes_source.readlines():
        temp = line.strip().split(",")
        if temp[src_index] in lst and temp[dst_index] in lst:
            if temp[src_index] not in routes:
                routes[temp[src_index]] = {temp[dst_index]}
            else:
                routes[temp[src_index]].add(temp[dst_index])
    return routes
def create_event_list(event_file: TextIO) -> List[Event]:
    """Return a list of Events based on raw list of events in <event_file>.

    Precondition: <event_file> is in the format specified by the assignment
    handout.
    """
    events = []

    for line in event_file.readlines():
        # data is a list of strings which contain our event data.
        data = line.split(" ")
        event_type = data[1]

        if event_type == "Close":
            events.append(CloseLine(int(data[0]), int(data[2])))
        elif event_type == "Arrive":
            items = []

            # This processes every item.
            for i in range(3, len(data), 2):
                item = Item(data[i], int(data[i + 1]))
                items.append(item)

            customer = Customer(data[2], items)
            events.append(CustomerArrival(int(data[0]), customer))

    return events
Exemplo n.º 5
0
    def format_keys(file: TextIO) -> Dict[str, List[str]]:
        """Return a dictionary key_to_notes where keys are base notes of a key
        and values are all semi-tones of their respective key using the
        data from file. Meant for use with keys.txt.

        Precondition: every line in file has format note: note note note ...
        """

        key_to_notes = {}
        lines = file.readlines()

        for line in lines:
            # A List of notes referring to its respective key.
            notes = []
            # Obtain the key of this set (line) of notes.
            key = line[:line.find(':')]

            # Append this key's notes to notes.
            i = line.find(' ')
            while i != -1:
                next_i = line.find(' ', i + 1)
                notes.append(line[i + 1:next_i])
                i = next_i

            key_to_notes[key] = notes

        return key_to_notes
Exemplo n.º 6
0
def tsv_to_list(stream: TextIO) -> List[List[yattag.doc.Doc]]:
    """Turns a tab-separated table into a list of lists."""
    table = []

    first = True
    type_index = 0
    for line in stream.readlines():
        if not line.strip():
            continue
        if first:
            first = False
            for index, column in enumerate(line.split("\t")):
                if column.strip() == "@type":
                    type_index = index
        cells = [html_escape(cell.strip()) for cell in line.split("\t")]
        if cells and type_index:
            # We know the first column is an OSM ID.
            try:
                osm_id = int(cells[0].getvalue())
                osm_type = cells[type_index].getvalue()
                doc = yattag.doc.Doc()
                href = "https://www.openstreetmap.org/{}/{}".format(
                    osm_type, osm_id)
                with doc.tag("a", href=href, target="_blank"):
                    doc.text(str(osm_id))
                cells[0] = doc
            except ValueError:
                # Not an int, ignore.
                pass
        table.append(cells)

    return table
Exemplo n.º 7
0
def read_movies(movie_file: TextIO) -> MovieDict:
    """Return a dictionary containing movie id to (movie name, movie genres)
    in the movie_file.

    >>> movfile = open('movies_tiny.csv')
    >>> movies = read_movies(movfile)
    >>> movfile.close()
    >>> 68735 in movies
    True
    >>> movies[124057]
    ('Kids of the Round Table', [])
    >>> len(movies)
    4
    >>> movies == MOVIE_DICT_SMALL
    True
    """
    dic = {}
    for line in movie_file.readlines()[1:]:
        line = line.strip('\n')
        movie_list = line.split(',')
        id = movie_list[0]
        title = movie_list[1]
        genres = movie_list[4:]
        dic[int(id)] = (title, genres)
    return dic
Exemplo n.º 8
0
def read_routes(routes_source: TextIO, airports: AirportDict) -> RouteDict:
    #RouteDict = Dict[str, Set[str]]
    """Return the flight routes from routes_source, including only the ones
    that have an entry in airports. If there are multiple routes between
    routes_source and a destination (on different airlines for example),
    include the destination only once. Routes that include null airport IDs
    should still be included, but routes that have empty IATA should be
    excluded.

    >>> from io import StringIO
    >>> routes_src = StringIO(TEST_ROUTES_SRC)
    >>> actual = read_routes(routes_src, TEST_AIRPORTS_DICT)
    >>> actual == TEST_ROUTES_DICT_FOUR_CITIES
    True
    """
    routes_list = routes_source.readlines()
    d = {}
    src_index = ROUTE_DATA_INDEXES['Source airport']
    dst_index = ROUTE_DATA_INDEXES['Destination airport']

    for i in range(len(routes_list)):
        source_airport = get_routes_information(routes_list[i], src_index)
        destination_airport = get_routes_information(routes_list[i], dst_index)

        if source_airport in airports and destination_airport in airports\
           and source_airport not in d:

            routes = set()  # it's a set
            routes.add(destination_airport)
            d[source_airport] = routes

        elif source_airport in airports and destination_airport in \
             airports and source_airport in d:
            d[source_airport].add(destination_airport)
    return d
Exemplo n.º 9
0
def read_tweets(file: TextIO) -> Dict[str, List[tuple]]:
    """ Return a dictionary with the key as the lowercase username of each 
    tweeter and the values as a list of tuples with information of each tweet
    that the user has tweeted given file, which contains tweets.
    """
    new_file = file.readlines()
    dictionary = {}
    index = -1
    for tweet in new_file:
        index += 1
        if len(tweet) > 3 and " " not in tweet and ":" == tweet[-2]:
            tweet = tweet.strip()            
            user = tweet[0:-1]
            dictionary[user.lower()] = []
        elif index != 0 and new_file[index - 1] == "<<<EOT\n" \
             or (user + ":") in new_file[index - 1]:
            tweet = tweet.strip()            
            tweet_prop = data_list(tweet)
        elif index != 0 and tweet != "<<<EOT\n" and type(tweet_prop) != tuple: 
            end_of_text = new_file.index("<<<EOT\n", index)
            for tweet_text in new_file[index:end_of_text]:
                if tweet_text not in tweet or tweet_text == '\n':
                    tweet = tweet + tweet_text
            tweet_prop = correct_tweet(tweet, tweet_prop)
            dictionary[user.lower()].append(tweet_prop)  
    return dictionary
Exemplo n.º 10
0
def read_curve_file(curve_file: TextIO) -> Dict[Any, Any]:
    """
    Read a curve file with extension .330
    The file format of this file is shown in test_lakeshore_file_parser.py
    in the test module

    The output is a dictionary with keys: "metadata" and "data".
    The metadata dictionary contains the first n lines of the curve file which
    are in the format "item: value". The data dictionary contains the actual
    curve data.
    """
    def split_data_line(line: str, parser: type = str) -> List[Any]:
        return [parser(i) for i in line.split("  ") if i != ""]

    def strip(strings: Iterable[str]) -> Tuple[str, ...]:
        return tuple(s.strip() for s in strings)

    lines = iter(curve_file.readlines())
    # Meta data lines contain a colon
    metadata_lines = takewhile(lambda s: ":" in s, lines)
    # Data from the file is collected in the following dict
    file_data: Dict[str, Dict[str, Any]] = dict()
    # Capture meta data
    parsed_lines = [strip(line.split(":")) for line in metadata_lines]
    file_data["metadata"] = {key: value for key, value in parsed_lines}
    # After meta data we have a data header
    header_items = strip(split_data_line(next(lines)))
    # After that we have the curve data
    data: List[List[float]] = [
        split_data_line(line, parser=float) for line in lines
        if line.strip() != ""
    ]
    file_data["data"] = dict(zip(header_items, zip(*data)))

    return file_data
Exemplo n.º 11
0
def handler(raw_instructions: TextIO) -> int:
    raw_earliest_timestamp, raw_buses = map(str.strip,
                                            raw_instructions.readlines())
    earliest_timestamp = int(raw_earliest_timestamp)
    buses = list(map(int, filter(lambda x: x != "x", raw_buses.split(","))))

    schedule: Dict[int, int] = dict(zip(buses, [0] * len(buses)))
    multiplier = 1

    for bus in schedule.keys():
        while True:
            bus_arrival = bus * multiplier
            multiplier += 1

            if bus_arrival >= earliest_timestamp:
                schedule[bus] = bus_arrival
                multiplier = 1
                break

    earliest_bus_arrival_timestamp = min(schedule.values())
    earliest_bus_id = list(schedule.keys())[list(
        schedule.values()).index(earliest_bus_arrival_timestamp)]

    return (earliest_bus_arrival_timestamp -
            earliest_timestamp) * earliest_bus_id
Exemplo n.º 12
0
def read_tweets(tweets_file: TextIO) -> Dict[str, List[tuple]]:
    """Return a dictionary after reading all of the data from the given file and
    formatting it into the dictionary which follows the format outlined in the 
    'read_tweets' function section of the assignment handout.
    """
    tweets_dict = {}
    data_list = tweets_file.readlines()
    modified_data_list = strip_data(data_list)
    usernames_list = collect_usernames(modified_data_list)

    for username in usernames_list:
        if username != usernames_list[-1]:
            user_tweets = (modified_data_list[modified_data_list.index(username) + 1:\
                                              modified_data_list.index(usernames_list\
                                               [usernames_list.index(username)+1])])
        else:
            user_tweets = (modified_data_list[modified_data_list.index(username)\
                                              + 1:])
        if user_tweets == []:
            tweets_dict[username[0:len(username) - 1].lower()] = []
        else:
            modified_user_tweets = []
            num_sentinels = user_tweets.count('<<<EOT')
            for _ in range(num_sentinels):
                tweet_list = []
                while user_tweets[0] != '<<<EOT':
                    tweet_list.append(user_tweets[0])
                    user_tweets.remove(user_tweets[0])
                user_tweets.remove('<<<EOT')
                modified_user_tweets.append(tweet_list)

            tweets_dict[username[0: len(username) - 1].lower()] = \
                                     format_tweet_list(modified_user_tweets)

    return tweets_dict
Exemplo n.º 13
0
 def __init__(self, file: tp.TextIO):
     self.token_regex = re.compile("|".join(
         f"(?P<{typ.name}>{regx})"
         for typ, regx in self._TOKEN_SPEC.items()))
     self.code = "".join(file.readlines())
     self.line = 1
     self.line_start = 0
Exemplo n.º 14
0
def get_from_txt(file: TextIO) -> dict:
    param_keys = ["C", "T", "D"]

    rts = {"id": 0, "tasks": []}

    flag = False

    rts_counter = 0

    for line in file.readlines():
        if not flag:
            number_of_tasks = int(line)
            flag = True
            rts_counter += 1
            rts["id"] = rts_counter
            rts["tasks"] = []
            task_counter = 0
        else:
            task = {}
            number_of_tasks -= 1
            task_counter += 1
            params = line.split()

            for k, v in zip(param_keys, params):
                task[k] = int(v)
            task["nro"] = task_counter
            rts["tasks"].append(task)

            if number_of_tasks == 0:
                flag = False

                yield rts
Exemplo n.º 15
0
def _parse_jsonl(stream: TextIO):
    """
    Parse a json lines file (http://jsonlines.org/).
    :param stream: file stream
    :return: list of dicts
    """
    return [json.loads(line.strip()) for line in stream.readlines()]
def process_queries(queries: typing.TextIO,
                    tournament: Tournament,
                    blank_line: bool = False) -> typing.List[str]:
    memo = []

    for query_line in queries.readlines():
        stripped_query_line = query_line.strip()
        lower_query_line = stripped_query_line.lower()

        if not any([q in lower_query_line for q in VALID_QUERIES]):
            print(f"error: Unrecognized query '{stripped_query_line}'",
                  file=sys.stderr)
            continue

        query_tokens = lower_query_line.split(" ")
        query = "_".join(query_tokens[0:2])

        arg_tokens = stripped_query_line.split(" ")
        arg = " ".join(arg_tokens[2:])

        memo.append(getattr(tournament, query)(arg))

        if blank_line:
            memo.append('')

    return memo
def strip_vars(f: typing.TextIO) -> list:
    tmp_lst = []
    i = 0
    count_var_regex = r"\bcount\b"
    count_var_regex_sub = COUNT_REPLACEMENT_WORD
    f.seek(0)
    try:
        for line in f.readlines():
            # Changes var.count, but not var.count_of_consul, for example
            repl_count_var = re.sub(count_var_regex, count_var_regex_sub, line,
                                    0)
            tmp_lst.append(repl_count_var)
            if line.startswith(tuple(ignore)):
                # This assumes your .tf files have a contiguous block of variables
                tmp_lst.pop()
                break
        for line in "".join(tmp_lst).splitlines():
            if line.startswith("variable"):
                break
            while not line.startswith("variable"):
                i += 1
                break
        del tmp_lst[:i]
        # Remove trailing newline if it got pulled in
        if "".join(tmp_lst[-1:]).isspace():
            del tmp_lst[-1:]
        # And add a newline to the head for any existing entries
        if not "".join(tmp_lst[0]).isspace():
            tmp_lst.insert(0, "\n")
    except IndexError:
        if len(sys.argv) > 1:
            print("INFO: No variables found in " + f.name)
    return tmp_lst
Exemplo n.º 18
0
def load_profiles(profiles_file: TextIO, person_to_friends: Dict[str, List[str]], \
                  person_to_networks: Dict[str, List[str]]) -> None:
    '''Update the person_to_friends dictionary and the person_to_networks
    dictionary to include data from profiles_file.
    Docstring examples not given since the result depends on input data.
    '''
    fileContent = profiles_file.readlines()
    numOfLines = len(fileContent)
    start = 0
    read_content = []
    for num in range(numOfLines):
        if fileContent[num].strip() is None:
            num_list = numOfLines[start:num]
            start = num + 1
            read_content.append(num_list)
    read_content.append(numOfLines[start:])
    for item in read_content:
        if len(item) > 1:
            key = str(item[0].strip().split(',')[1]) + " " +  str(item[0].strip().split(',')[0])
            for num in range(1, len(item)):
                if len(item[num].strip().split(',')) > 1:
                    name = str(item[num].strip().split(',')[1]) + " " + str(item[num].strip().split(',')[0])
                    if key in person_to_friends.keys():  # insure if key has then append  else create
                        person_to_friends[key].append(name)
                        person_to_friends[key].sort()  # sort by alpha
                    else:
                        person_to_friends[key] = [name]
                else:
                    if key in person_to_networks.keys():
                        person_to_networks[key].append(item[num])
                        person_to_friends[key].sort()  # sort by alpha
                    else:
                        person_to_networks[key] = [item[num]]
Exemplo n.º 19
0
def handler(raw_program: TextIO) -> int:
    memory: Dict[int, int] = {}
    mask = "X" * 36

    for raw_instruction in raw_program.readlines():
        instruction = raw_instruction.strip()

        if instruction.startswith("mask ="):
            mask = instruction.split(" = ")[1]
            continue

        raw_address, raw_value = instruction.split(" = ")
        address = int(raw_address.lstrip("mem[").rstrip("]"))
        binary_address = bin(address)[2:].zfill(36)

        for bits in map(iter, product("01",
                                      repeat=mask.count("X"))):  # type: ignore
            new_address: List[str] = []

            for bit, val in zip(mask, binary_address):
                if bit == "X":
                    new_address.append(str(next(bits)))
                elif bit == "0":
                    new_address.append(val)
                elif bit == "1":
                    new_address.append("1")

            memory[int("".join(new_address), 2)] = int(raw_value)

    return sum(memory.values())
Exemplo n.º 20
0
def load_profiles(
    profiles_file: TextIO
) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
    """Return a two-item tuple containing a "person to friends" dictionary
    and a "person_to_clubs" dictionary with the data from
    profiles_file.

    NOTE: Functions (including helper functions) that have a parameter
          of type TextIO do not need docstring examples.

    """
    content = profiles_file.readlines()
    content.append('\n')
    person_friend, person_club, current_friend, current_club = {}, {}, [], []
    profile = (person_friend, person_club)
    for i in range(len(content)):
        if i == 0 or content[i - 1] == '\n':
            cur = content[i].rstrip('\n')
            cur = ((cur[cur.rfind(',') + 2:]) + " " + (cur[:cur.rfind(',')]))
            person_friend[cur], person_club[cur] = current_friend, current_club
        elif content[i] != '\n':
            if ',' in content[i]:
                n_cur = content[i].rstrip('\n')
                n_cur = (n_cur[n_cur.rfind(',') +
                               2:]) + " " + (n_cur[:n_cur.rfind(',')])
                current_friend.append(n_cur)
                current_friend.sort()
            else:
                current_club.append(content[i].rstrip('\n'))
                current_club.sort()
        else:
            create_list(cur, current_friend, current_club, person_friend,
                        person_club)
            current_friend, current_club = [], []
    return profile
Exemplo n.º 21
0
def read_ratings(rating_file: TextIO) -> UserRatingDict:
    """Return a dictionary containing user id to {movie id: ratings} for the
    collection of user movie ratings in rating_file.

    >>> rating_file = open('ratings_tiny.csv')
    >>> ratings = read_ratings(rating_file)
    >>> rating_file.close()
    >>> len(ratings)
    2
    >>> ratings[1]
    {2968: 1.0, 3671: 3.0}
    >>> ratings[2]
    {10: 4.0, 17: 5.0}
    """
    dic = {}
    for line in rating_file.readlines()[1:]:
        line = line.strip('\n')
        rating_list = line.split(',')
        id = int(rating_list[0])
        movie = int(rating_list[1])
        rate = float(rating_list[2])
        if id in dic:
            new_dic = dic[id]
            new_dic[movie] = rate
            dic[id] = new_dic
        else:
            dic[id] = {movie: rate}
    return dic
Exemplo n.º 22
0
def read_tweets(file: TextIO) -> Dict[str, List[tuple]]:
    """Return a dictionary with the keys being lowercase Twitter usernames and
    the items in the list being tuples representing tweets of each username 
    after reading an opened file, file.
    """
    new_dict = {}
    usernames, user_num = [], []
    line = file.readlines()
    for w in range(len(line)):
        if w == 0 or (line[w - 1] == '<<<EOT\n' or w - 1 in user_num) and \
           line[w].strip().endswith(':'):
            new_dict[line[w].strip().strip(':').lower()] = []
            usernames.append(line[w].strip().strip(':').lower())
            user_num.append(line.index(line[w]))
    for n in range(len(user_num)):
        x = user_num[n] + 1
        while x < len(line) and (x not in user_num or not \
                                 line[x].strip().endswith(':')):
            tweet_text = ''
            y = x + 1
            while y < len(line) and line[y] != '<<<EOT\n':
                tweet_text += line[y]
                y += 1
            tweet_rt = int(line[x].strip().split(',')[FILE_RETWEET_INDEX])
            tweet_fav = int(line[x].strip().split(',')[FILE_FAVOURITE_INDEX])
            tweet_source = line[x].strip().split(',')[FILE_SOURCE_INDEX]
            tweet_date = int(line[x].strip().split(',')[FILE_DATE_INDEX])
            new_dict[usernames[n]].append((tweet_text.strip(), tweet_date,
                                           tweet_source, tweet_fav, tweet_rt))
            x = y + 1
    return new_dict
Exemplo n.º 23
0
def read_edges(infile: TextIO) -> defaultdict(list):
    edges = defaultdict(list)
    for line in infile.readlines():
        v1, v2 = line.split()
        v1, v2 = int(v1), int(v2)
        edges[v1].append(v2)
    return edges
Exemplo n.º 24
0
def read_requirements_txt_from(file: TextIO) -> Iterator[Package]:
    """Reads a requirements.txt given by path and yields 'package==version' items."""
    for line in file.readlines():
        # Skip empty lines or lines only containing a hash.
        if line.strip().startswith("--hash") or not len(line.strip()):
            continue
        # Skip lines only containing editable packages.
        if line.strip().startswith("-e"):
            continue
        # Skip comment lines.
        if line.strip().startswith("#"):
            continue

        try:
            line = line.split(";")[0]
            package_name, package_version = line.strip().split(" ")[0].split(
                "==")
            yield package_name, package_version
        except (ValueError, KeyError):
            click.secho("Warning! ", err=True, nl=False, fg="yellow")
            click.secho(
                f"Unable extract package and version from '{line.strip()}'. Skipping!",
                err=True,
            )
            continue
Exemplo n.º 25
0
def read_repeatmasker(motif_hash: Dict[str, int], motif_mut_hash: Dict[str,
                                                                       int],
                      filestream: TextIO) -> Iterator[Repeat]:
    """Reads Repeatmasker output and prints to stdout.

    Args:
        motif_hash (Dict[str, int]): Simple repeats to search for
        motif_mut_hash (Dict[str, int]): Mutated simple repeats to search for
        filestream (TextIO): Stream with Repeatmasker output
    """

    len0 = len(_MOTIF0)
    for line in filestream.readlines():
        repeat = _get_repeat(line)

        if repeat.typ == 0 and repeat.fam in ("Simple_repeat", "Satellite"):
            motif = re.match(r"^\(([ACGT]+)\)n", repeat.rep)
            if motif and motif.group(1) in motif_hash:
                repeat.typ = _TYPES['HSATII']
            elif motif and len(motif.group(1)) % len0 == 0:
                count, count_mut = _check_motifs(motif.group(1),
                                                 motif_mut_hash, motif_hash)
                if count > 0 and (count + count_mut) * len0 == len(
                        motif.group(1)):
                    repeat.typ = _TYPES['HSATII']
        if repeat.ctg and repeat.typ > 0:
            yield repeat
Exemplo n.º 26
0
def read_pronouncing_dictionary(
        pronunciation_file: TextIO) -> PRONOUNCING_DICTIONARY:
    """Read pronunciation_file, which is in the format of the CMU Pronouncing
    Dictionary, and return the pronunciation dictionary.
    
    Precondition: The pronunciation file has the first line commented followed
    by every subsequent line starting with a capitalized word and followed by 
    capitalized phonemes of that word with every phoneme seperated by 
    a space and words seperated by new line characters
    
    >>> import io
    >>> dict_file = io.StringIO(SAMPLE_DICTIONARY_FILE)
    >>> result = read_pronouncing_dictionary(dict_file)
    >>> result == EXPECTED_DICTIONARY
    True
    >>> import io
    >>> dict_file_2 = io.StringIO(SAMPLE_DICTIONARY_FILE_2)
    >>> result = read_pronouncing_dictionary(dict_file_2)
    >>> result == EXPECTED_DICTIONARY_2
    True
    """

    pronouncing_dictionary = {}
    for line in pronunciation_file.readlines():
        if not ";;;" in line and not line in '\n':
            components = line.split()
            first_element = components[0]
            components.remove(first_element)
            pronouncing_dictionary[first_element] = components
    return pronouncing_dictionary
Exemplo n.º 27
0
def read_tweets(fp: TextIO) -> Dict[str, List[tuple]]:
    """
    Read all of the data from the given file into a dictionary
    """
    tweets_d = {}
    i = 0
    tweets_lines = fp.readlines()
    user = ""

    while i < len(tweets_lines):
        if tweets_lines[i].strip()[-1] == ":":
            user = tweets_lines[i].strip().replace(":", "")
        else:
            tweet_lst = tweets_lines[i].strip().split(",")
            text = ""
            flag = True
            while flag:
                i += 1
                if EOT in tweets_lines[i]:
                    flag = False
                else:
                    text += tweets_lines[i]
            if user not in tweets_d:
                tweets_d[user] = []
            tweets_d[user].append(
                (text.strip(), int(tweet_lst[FILE_DATE_INDEX]),
                 tweet_lst[FILE_SOURCE_INDEX],
                 int(tweet_lst[FILE_FAVOURITE_INDEX]),
                 int(tweet_lst[FILE_RETWEET_INDEX])))
        i += 1
    return tweets_d
Exemplo n.º 28
0
def read_poetry_form_descriptions(poetry_forms_file: TextIO) -> POETRY_FORMS:
    """Return a dictionary of poetry form name to poetry pattern for the poetry
    forms in poetry_forms_file.

    >>> form_file = io.StringIO(SAMPLE_POETRY_FORM_FILE)
    >>> result = read_poetry_form_descriptions(form_file)
    >>> result == EXPECTED_POETRY_FORMS
    True
    """
    value = ()
    list1 = []
    list2 = []
    Dic_result = {}
    key = ''
    for lines in poetry_forms_file.readlines():
        if lines == '\n':
            value = (list1, list2)
            Dic_result[key] = value
            list1 = []
            list2 = []

        if lines[0].isalpha():
            key = lines.strip()

        if lines[0].isdigit():
            num_char = re.split(' +', lines.strip())
            list1.append(int(num_char[0]))
            list2.append(num_char[1])

    value = (list1, list2)
    Dic_result[key] = value
    return Dic_result
Exemplo n.º 29
0
def read_ratings(rating_file: TextIO) -> UserRatingDict:
    """Return a dictionary containing user id to {movie id: ratings} for the
    collection of user movie ratings in rating_file.

    >>> rating_file = open('ratings_tiny.csv')
    >>> ratings = read_ratings(rating_file)
    >>> rating_file.close()
    >>> len(ratings)
    2
    >>> ratings[1]
    {2968: 1.0, 3671: 3.0}
    >>> ratings[2]
    {10: 4.0, 17: 5.0}
    """

    user_rating = {}

    rating_file.readline()

    for line in rating_file.readlines():
        rating = {}
        rating_info = line.strip('\n').split(',')
        movie_id = int(rating_info[1])
        movie_rating = float(rating_info[2])
        user_id = int(rating_info[0])
        rating[movie_id] = movie_rating

        if user_id in user_rating:
            user_rating[user_id][movie_id] = movie_rating
        else:
            user_rating[user_id] = rating

    return user_rating
Exemplo n.º 30
0
def read_movies(movie_file: TextIO) -> MovieDict:
    """Return a dictionary containing movie id to (movie name, movie genres)
    in the movie_file.

    >>> movfile = open('movies_tiny.csv')
    >>> movies = read_movies(movfile)
    >>> movfile.close()
    >>> 68735 in movies
    True
    >>> movies[124057]
    ('Kids of the Round Table', [])
    >>> len(movies)
    4
    >>> movies == MOVIE_DICT_SMALL
    True
    """
    movie_dict = {}

    movie_file.readline()

    for line in movie_file.readlines():
        movie_info = line.strip('\n').split(',')
        # list of movie info
        value_tuple = (movie_info[1], movie_info[4:])
        movie_dict[int(movie_info[0])] = value_tuple
    return movie_dict
Exemplo n.º 31
0
    def __init__(self, f: TextIO):
        """
        Create a new `PushbackFile` object to wrap a file-like object.

        **Parameters**

        - `f` (file-like object): A file-like object that contains both a
          `write()` method and a `flush()` method.
        """
        self.__buf = [c for c in ''.join(f.readlines())]
Exemplo n.º 32
0
    def _search(self, f: TextIO, filename: Optional[str] = None) -> bool:
        paragraph = []
        last_empty = False
        found = False
        eop_line = None

        def print_paragraph(paragraph: Sequence[str]) -> NoReturn:
            if self._print_file_header:
                print(f'::::::::::\n{filename}\n::::::::::\n')
                self._print_file_header = False
            print('\n'.join(paragraph))
            if self.print_eop and (eop_line is not None):
                print(eop_line)
            else:
                print()

        for line in f.readlines():
            if self.eop_regexp.match(line):
                # End of current paragraph, or a redundent (consecutive)
                # end-of-paragraph mark.  If it's truly the first one since
                # the end of the paragraph, search the accumulated lines of
                # the paragraph.

                if line[-1] == '\n':
                    eop_line = line[:-1]
                else:
                    eop_line = line

                if not last_empty:
                    last_empty = True
                    found = self._search_paragraph(paragraph)
                    if found:
                        print_paragraph(paragraph)
                    paragraph = []

            else:
                # Save this line in the current paragraph buffer
                if line[-1] == '\n':
                    line = line[:-1]
                paragraph += [line]
                last_empty = False

        # We might have a paragraph left in the buffer. If so, search it.

        if not last_empty:
            if self._search_paragraph(paragraph):
                found = True
                print_paragraph(paragraph)

        return found
def load_profiles(profiles_file: TextIO, person_to_friends: Dict[str, List[str]], \
                  person_to_networks: Dict[str, List[str]]) -> None:
    '''Update the person_to_friends dictionary and the person_to_networks
    dictionary to include data from profiles_file.

    '''
    user = none
    for l in profiles_file.readlines():
        if not user:
            user = to_user(l)
            create_key(user, person_to_friends)
            create_key(user, person_to_networks)
        else:
            if len(l.strip()) == 0:
                user = none
            elif ',' in l:
                person_to_friends[user].append(to_user(l))
            else:
                person_to_networks[user].append(l.strip())