示例#1
0
def _parse_joined_events(d: FacebookJson) -> Iterator[AcceptedEvent]:
    for event in d["event_responses"]["events_joined"]:
        yield AcceptedEvent(
            name=event["name"],
            starts_dt=parse_datetime_sec(event["start_timestamp"]),
            ends_dt=parse_datetime_sec(event["end_timestamp"]),
        )
示例#2
0
def _parse_address_book(d: FacebookJson) -> Iterator[Contact]:
    # remove top-level address book name
    for addr_book_top in d.values():
        for addr_book_list in addr_book_top.values():
            for contact in addr_book_list:
                yield Contact(
                    name=contact["name"],
                    phone_number=contact["details"][0]["contact_point"],
                    created=parse_datetime_sec(contact["created_timestamp"]),
                    updated=parse_datetime_sec(contact["updated_timestamp"]),
                )
示例#3
0
def _parse_authorized_logins(d: FacebookJson) -> Iterator[AdminAction]:
    for ac in d["recognized_devices"]:
        metadata = {}
        if "updated_timestamp" in ac:
            metadata["updated_at"] = parse_datetime_sec(ac["updated_timestamp"])
        yield AdminAction(
            description="Known Device: {}".format(ac["name"]),
            dt=parse_datetime_sec(ac["created_timestamp"]),
            ip=ac["ip_address"],
            user_agent=ac["user_agent"],
            metadata=metadata,
        )
示例#4
0
def _parse_messages_in_conversation(
    messages: List[FacebookJson],
) -> Iterator[Res[Message]]:
    for m in messages:
        timestamp = parse_datetime_sec(m["timestamp_ms"] / 1000)
        author = m["sender_name"]
        if m["type"] == "Unsubscribe":
            continue
        elif m["type"] in ["Generic", "Share"]:
            # eh, I dont care that much about these in context, can do analysis on my/photos.py on its own
            if any([k in m for k in ["photos", "sticker"]]):
                continue
            elif "content" in m:
                yield Message(
                    dt=timestamp,
                    author=author,
                    content=m["content"],
                    metadata=m.get("share"),
                )
            # if this just actually doesnt have a field with content for some reason, ignore it
            elif set(m.keys()).issubset(set(["sender_name", "timestamp_ms", "type"])):
                continue
            else:
                yield RuntimeError(
                    "Not sure how to parse message without 'photos' or 'content': {}".format(
                        m
                    )
                )
        else:
            yield RuntimeError("Not sure how to parse message for type: {}".format(m))
示例#5
0
def _parse_file(histfile: Path) -> Results:
    dt: Optional[datetime] = None
    command_buf = ""  # current command
    for line in histfile.open(encoding="latin-1"):
        if line.startswith("#"):
            # parse lines like '#1620931766'
            # possible string datetime
            sdt = line[1:].strip()  # remove newline
            try:
                newdt = parse_datetime_sec(sdt)
            except Exception as e:
                logger.debug(f"Error while parsing datetime {e}")
            else:
                # this case happens when we successfully parse a datetime line
                # yield old data, then set newly parsed data to next items datetime
                if dt is not None:
                    # rstrip \n gets rid of the last newline for each command
                    yield Entry(dt=dt, command=command_buf.rstrip("\n"))
                # set new datetime for next entry
                dt = newdt
                # overwrite command buffer
                command_buf = ""
                continue
        # otherwise, append. this already includes newline
        command_buf += line
    # yield final command
    if dt is not None and command_buf.strip():
        yield Entry(dt=dt, command=command_buf.rstrip("\n"))
示例#6
0
def _parse_file(histfile: Path) -> LinearResults:
    with histfile.open("r", encoding="utf-8", newline="") as f:
        contents = f.read()
    # convert line breaks to unix style; i.e. broken ^M characters
    buf = StringIO(contents.replace("\r", ""))
    csv_reader = csv.reader(buf,
                            delimiter=",",
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)
    while True:
        try:
            row = next(csv_reader)
            yield LinearResult(
                dt=parse_datetime_sec(row[0]),
                duration=int(row[1]),
                application=row[2],
                window_title=row[3],
            )
        except csv.Error:
            # some lines contain the NUL byte for some reason... ??
            # seems to be x-lib/encoding errors causing malformed application/file names
            # catch those and ignore them
            pass
        except StopIteration:
            return
示例#7
0
def _parse_account_activity(d: FacebookJson) -> Iterator[AdminAction]:
    for ac in d["account_activity"]:
        yield AdminAction(
            description=ac["action"],
            dt=parse_datetime_sec(ac["timestamp"]),
            ip=ac["ip_address"],
            user_agent=ac["user_agent"],
        )
示例#8
0
def _parse_group_comments(d: FacebookJson) -> Iterator[Comment]:
    for comment in d["comments"]:
        yield Comment(
            content=comment["data"][0]["comment"]["comment"],
            action=comment["title"],
            dt=parse_datetime_sec(comment["timestamp"]),
            metadata=comment["data"][0]["comment"]["group"],
        )
示例#9
0
def _parse_json_dump(p: Path) -> Results:
    for blob in json.loads(p.read_text()):
        yield Event(
            event_type="chatlog",
            dt=parse_datetime_sec(blob["dt"]),
            channel=blob["channel"],
            context=blob["message"],
        )
示例#10
0
def _parse_group_posts(d: FacebookJson) -> Iterator[Union[Comment, Post]]:
    for log_data_list in d.values():
        for comm_list in log_data_list.values():
            for comm in comm_list:
                data_keys = comm["data"][0].keys()
                if "comment" in data_keys:
                    yield Comment(
                        content=comm["data"][0]["comment"]["comment"],
                        action=comm["title"],
                        dt=parse_datetime_sec(comm["timestamp"]),
                        metadata=comm["data"][0]["comment"]["group"],
                    )
                else:
                    yield Post(
                        content=comm["data"][0]["post"],
                        action=comm["title"],
                        dt=parse_datetime_sec(comm["timestamp"]),
                    )
示例#11
0
def _parse_json_file(p: Path) -> Results:
    for e_info in json.loads(p.read_text()):
        dt, meta_tuple = e_info
        meta_tag, meta_joined = meta_tuple
        yield Event(
            dt=parse_datetime_sec(dt),
            event_tag=meta_tag,
            metadata=meta_joined.split("|"),
        )
示例#12
0
def _parse_metadata(histline: str) -> Optional[Tuple[datetime, int, str]]:
    """
    parse the date, duration, and command from a line
    """
    matches = PATTERN.match(histline)
    if matches:
        g = matches.groups()
        return (parse_datetime_sec(g[0]), int(g[1]), g[2])
    return None
示例#13
0
def _parse_admin_records(d: FacebookJson) -> Iterator[AdminAction]:
    for rec in d["admin_records"]:
        s = rec["session"]
        yield AdminAction(
            description=rec["event"],
            dt=parse_datetime_sec(s["created_timestamp"]),
            ip=s["ip_address"],
            user_agent=s["user_agent"],
        )
示例#14
0
def _parse_photo_ips(d: FacebookJson) -> Iterator[UploadedPhoto]:
    for photo_info in d["photos"]:
        if (
            "media_metadata" in photo_info
            and "photo_metadata" in photo_info["media_metadata"]
            and "upload_ip" in photo_info["media_metadata"]["photo_metadata"]
        ):
            yield UploadedPhoto(
                dt=parse_datetime_sec(photo_info["creation_timestamp"]),
                ip=photo_info["media_metadata"]["photo_metadata"]["upload_ip"],
            )
示例#15
0
def _parse_achievement(ach: Dict[str, Any], game_name: str) -> Achievement:
    achieved = ach["progress"]["unlocked"]
    achieved_on = None
    # parse datetime if it has it
    # could possibly throw an error, but its caught above
    if achieved:
        achieved_on = parse_datetime_sec(ach["progress"]["data"])
    return Achievement(
        title=ach["title"],
        description=ach["description"],
        game_name=game_name,
        achieved=achieved,
        achieved_on=achieved_on,
        icon=ach.get("icon"),
    )
示例#16
0
def _parse_reactions(d: FacebookJson) -> Iterator[Action]:
    for react in d["reactions"]:
        yield Action(
            description=react["title"], dt=parse_datetime_sec(react["timestamp"])
        )
示例#17
0
def _parse_installed_apps(d: FacebookJson) -> Iterator[Action]:
    for app in d["installed_apps"]:
        yield Action(
            description="{} was installed".format(app["name"]),
            dt=parse_datetime_sec(app["added_timestamp"]),
        )
示例#18
0
def _parse_app_posts(d: FacebookJson) -> Iterator[Action]:
    for post in d["app_posts"]:
        yield Action(
            description=post["title"], dt=parse_datetime_sec(post["timestamp"])
        )
示例#19
0
def _parse_deleted_friends(d: FacebookJson) -> Iterator[Friend]:
    for friend in d["deleted_friends"]:
        yield Friend(
            name=friend["name"], dt=parse_datetime_sec(friend["timestamp"]), added=False
        )
示例#20
0
def _parse_posts(d: FacebookJson) -> Iterator[Res[Union[Post, Action]]]:
    all_posts = d
    # handle both profile updates and posts
    if isinstance(all_posts, dict) and "profile_updates" in all_posts:
        all_posts = all_posts["profile_updates"]
    for post in all_posts:
        if "attachments" in post:
            att = post["attachments"]
            # e.g. photo with a description
            # make sure the structure looks like a media post
            # traverse into the image metadata post to see if we can find a description
            if len(att) >= 1 and "data" in att[0] and len(att[0]["data"]) >= 1:
                # make sure each data item has only one item of media
                if all([len(attach["data"]) == 1 for attach in att]):
                    att_data = [attach["data"][0] for attach in att]
                    # switch, over posts that have descriptions (e.g. me describing what the photo is), and posts that dont
                    for dat in att_data:
                        if "media" in dat:
                            mdat = dat["media"]
                            # image where I described something
                            if "description" in mdat:
                                yield Action(
                                    description=mdat["description"],
                                    dt=parse_datetime_sec(post["timestamp"]),
                                    metadata=mdat,
                                )
                            # image when I just posted to a album
                            elif "title" in mdat:
                                yield Action(
                                    description="Posted to Album {}".format(
                                        mdat["title"]
                                    ),
                                    dt=parse_datetime_sec(post["timestamp"]),
                                    metadata=mdat,
                                )
                            else:
                                yield RuntimeError(
                                    "No known way to parse image post {}".format(post)
                                )
                        elif "place" in dat:
                            # check-in into place
                            if "name" in dat["place"]:
                                yield Action(
                                    description="Visited {}".format(
                                        dat["place"]["name"]
                                    ),
                                    dt=parse_datetime_sec(post["timestamp"]),
                                    metadata=dat,
                                )
                            else:
                                yield RuntimeError(
                                    "No known way to parse location post {}".format(
                                        post
                                    )
                                )
                        elif "life_event" in dat:
                            # started high school etc.
                            ddat = dat["life_event"]
                            yield Action(
                                description=ddat["title"],
                                dt=parse_datetime_sec(post["timestamp"]),
                                metadata=ddat,
                            )
                        # third party app event (e.g. Listened to Spotify Song)
                        elif "title" in post:
                            if "external_context" in dat:
                                if "title" in post:
                                    yield Action(
                                        description=post["title"],
                                        dt=parse_datetime_sec(post["timestamp"]),
                                        metadata=dat,
                                    )
                            # seems like bad data handling on facebooks part.
                            # these are still events,
                            # but it doesnt have an external context,
                            # its like a stringified version of the data
                            elif "text" in dat:
                                yield Action(
                                    description=post["title"],
                                    dt=parse_datetime_sec(post["timestamp"]),
                                    metadata=dat,
                                )
                            else:
                                yield RuntimeError(
                                    "No known way to parse attachment post with title {}".format(
                                        post
                                    )
                                )
                        else:  # unknown data type
                            yield RuntimeError(
                                "No known way to parse data type with attachment {}".format(
                                    post
                                )
                            )
                else:  # unknown structure
                    yield RuntimeError(
                        "No known way to parse data from post {}".format(post)
                    )
            else:
                yield RuntimeError(
                    "No known way to parse attachment post {}".format(post)
                )
        elif "data" in post and len(post["data"]) == 1:
            dat = post["data"][0]
            # basic post I wrote on my timeline
            if "post" in dat and isinstance(dat["post"], str) and "title" in post:
                yield Post(
                    content=dat["post"],
                    dt=parse_datetime_sec(post["timestamp"]),
                    action=post["title"],
                )
            elif "profile_update" in dat:
                yield Action(
                    description="Updated Profile",
                    dt=parse_datetime_sec(post["timestamp"]),
                    metadata=dat["profile_update"],
                )
            else:
                yield RuntimeError("No known way to parse basic post {}".format(post))
        # post without any actual content (e.g. {'timestamp': 1334515711, 'title': 'Sean Breckenridge posted in club'})
        # treat this as an action since I have no content here
        elif set(("timestamp", "title")) == set(post.keys()):
            yield Action(
                description=post["title"], dt=parse_datetime_sec(post["timestamp"])
            )
        else:
            yield RuntimeError("No known way to parse post {}".format(post))
示例#21
0
def _parse_group_activity(d: FacebookJson) -> Iterator[Action]:
    for gr in d["groups_joined"]:
        yield Action(
            description=gr["title"],
            dt=parse_datetime_sec(gr["timestamp"]),
        )
示例#22
0
def _parse_search_history(d: FacebookJson) -> Iterator[Search]:
    for search in d["searches"]:
        assert len(search["data"]) == 1
        yield Search(
            query=search["data"][0]["text"], dt=parse_datetime_sec(search["timestamp"])
        )
示例#23
0
def _parse_page_likes(d: FacebookJson) -> Iterator[Action]:
    for page in d["page_likes"]:
        yield Action(
            description="Liked Page {}".format(page["name"]),
            dt=parse_datetime_sec(page["timestamp"]),
        )