Пример #1
0
    def categorize_friends(self, friend):
        """
        Edges status logic as follows:
        Old: Friends who have not logged on since self.tSinceLogin
        cross server: Friends who are on different servers
        Both: both old and cross server (these are so rare...)
        normal: The rest
        """
        character_id = friend.get("character_id", -1)

        if "world_id" not in friend:
            fetch_logger().critical(f"{friend}")
            return "Damaged"

        world_id = friend["world_id"]
        last_online = int(friend.get("last_login_time", -1))

        cross_server = int(world_id) == int(self.server_id)
        old = last_online > self.tSinceLogin

        if cross_server and old:
            return "both"
        if cross_server:
            return "cross server"
        if old:
            return "old"
        return "normal"
Пример #2
0
    def get_friendlist_network(self):

        # Get the starting nodes from the leader-boards.
        # If we already have seed nodes for the day simply retrieve them,
        # otherwise gather some.
        existing_seeds = single_column(self.archive_connection,
                                       "SELECT name from seed_nodes")

        if self.table_name in existing_seeds:
            fetch_logger().info(
                "We already have fresh player ids for this server from today loading them"
            )
            seed = single_column(
                self.archive_connection,
                f'SELECT seed_nodes from seed_nodes where name = "{self.table_name}"',
            )[0]
            self.initial_character_ids = seed.split(",")
        else:
            fetch_logger().info(
                "Picking fresh player ids from server leaderboard")
            self.initial_character_ids = self.leader_board_sample(10)

        # Get raw responses from the server for the nodes we got from the
        # leader board earlier.
        initial_raw_friendlists = self.get_friends(self.initial_character_ids)

        friends_to_check = self.unpack_friendlists(initial_raw_friendlists)

        while len(friends_to_check) > 0:

            next_raw_friendlists = self.get_friends(friends_to_check)

            friends_to_check = self.unpack_friendlists(next_raw_friendlists)
Пример #3
0
def run_PC():
    # Crawl the specified PC servers.
    argument_parser = setup_cli_options()
    parser_options = argument_parser.parse_args()
    for initials in parser_options.target_initials:
        server_crawler = MainDataCrawler(initials, parser_options.restart,
                                         parser_options.name_overwrite)
        fetch_logger().info(f"Now crawling {server_crawler.server_name}")
        server_crawler.run()
Пример #4
0
    def archive_character_data(self, graph):
        """
        Fetch data for the nodes in the graph. Only including those which are online.

        """
        nodes = graph.nodes()

        # Find only those characters who are not "OLD"
        # old_id = single_column(self.database_connection,
        #                        f"SELECT character_id FROM {self.table_name}_character_info WHERE last_login_date < ?",
        #                        (self.tSinceLogin,))
        # remaining_nodes = [n for n in nodes if n not in old_id]
        remaining_nodes = nodes
        # Gets character attributes for each found in the friend lists
        archive_id = single_column(
            self.archive_connection,
            f"SELECT character_id FROM {self.table_name}_node ")
        remaining_nodes = [n for n in remaining_nodes if n not in archive_id]
        re_count = len(remaining_nodes)
        fetch_logger().info(
            f"Number of nodes in graph is: {len(nodes)} Number of unarchived nodes is: {re_count}"
        )
        # Break the list up into chunks of 40
        smallLists = chunks(remaining_nodes, CHARACTER_INFO_BATCH_SIZE)

        completed_jobs = 0
        for character_id_batch in smallLists:

            character_ids = ",".join(character_id_batch)
            url = f"http://census.daybreakgames.com/s:{SERVICE_ID}/get/" \
                  f"{self.namespace}/character/?character_id={character_ids}" \
                  f"&c:resolve=outfit,name,stats,times,stat_history"

            fetch_logger().debug(f'fetching {url}')
            decoded = fetch_url(url)

            results = decoded["character_list"]
            for result in results:
                # Unpack the server response and add each to the archive_connection.
                try:
                    self.archive_connection.execute(
                        f"INSERT OR REPLACE into {self.table_name}_node (character_id,raw) VALUES(?,?)",
                        (result["character_id"], json.dumps(result)),
                    )
                except Exception:
                    fetch_logger().info("archive_connection failure")
                    if "error" in str(decoded):
                        fetch_logger().info("Server down")
                        exit(1)
                    else:
                        raise
            self.archive_connection.commit()
            completed_jobs += len(character_id_batch)
            fetch_logger().info(
                f"looking up data completion is at {(completed_jobs / re_count) * 100.0} percent"
            )
Пример #5
0
def run_PS4():
    # Crawl the playstation 4 servers.
    # Note that most of these servers have since been merged together.
    argument_parser = setup_cli_options()
    parser_options = argument_parser.parse_args()
    for initials in ["graph", "Cr", "L", "S", "Ce", "P"]:
        server_crawler = MainDataCrawler(initials, parser_options.restart,
                                         parser_options.name_overwrite)
        fetch_logger().info(f"Now crawling {server_crawler.server_name}")
        server_crawler.run()
Пример #6
0
def save_graph_to_graphml(database_name: str, source_table_name: str) -> None:
    """
    Returns the graph and writes it to the desktop for testing in Gephi.


    """
    setup_logger('DEBUG', source_table_name, 'server_name')
    graphml_attrs = [
        "name",
        "faction",
        "br",
        "outfitTag",
        "outfitId",
        "outfitSize",
        "creation_date",
        "login_count",
        "minutes_played",
        "last_login_date",
    ]
    database_path = os.path.join(DATABASE_PATH, database_name)
    database_connection = sqlite3.connect(database_path)
    archive_path = os.path.join(DATABASE_PATH, 'archive.db')
    archive_connection = sqlite3.connect(archive_path)

    edge_raw = multi_column(
        database_connection,
        f'SELECT * FROM {source_table_name}_eset where Status="normal"')

    graph = nx.Graph()
    for edge in edge_raw:
        # if edge[2] == "normal":
        graph.add_edge(edge[0], edge[1])
        graph[edge[0]][edge[1]]["status"] = edge[2]

    archive_id = single_column(
        archive_connection,
        f"Select character_id from {source_table_name}_node")
    nodes_without_data = [n for n in graph.nodes() if n not in archive_id]

    # fetch_logger().info(f"{len(nodes_without_data)} deleted for having no data")
    # graph.remove_nodes_from(nodes_without_data)
    for attr in graphml_attrs:

        attribute_storage_dict = sql_columns_to_dicts(source_table_name, attr,
                                                      database_connection)

        fetch_logger().info(f"{attr}: {len(graph.nodes())}")

        graph = my_set_thing(graph, attr, attribute_storage_dict)

    graphml_path = os.path.join("C:", f"{source_table_name}test.graphml")
    nx.write_graphml(graph, graphml_path)

    return graph
Пример #7
0
    def __init__(self,
                 server_initial: str,
                 restart: bool,
                 name_overwrite: str = None) -> None:
        """
        This limits strain on the database_connection by restricting our attention to only
        those nodes active in last MAX_INACTIVE_DAYS days."""
        self.current_time = time.mktime(time.localtime())
        current_datetime = datetime.datetime.now()
        self.tSinceLogin = self.current_time - MAX_INACTIVE_DAYS * 24 * 3600
        # Using the dictionaries above get the name space server Id and
        # server name of the server we wish to crawl.
        # Remember you can change these class variables if needed.
        self.namespace = namespace_dict[server_initial]
        self.server_id = server_id_dict[server_initial]
        self.server_name = server_name_dict[server_initial]

        self.table_name = f'{self.server_name}_{current_datetime.strftime("%B_%d_%Y")}'

        if name_overwrite is not None:
            self.table_name = f'{self.server_name}_{name_overwrite}'

        setup_logger('DEBUG', self.table_name, self.server_name)

        # The set done contains every node we have already examined
        # including those rejected as too old or otherwise invalid.
        self.done = set()

        # player_friendlist_dict stores the data we have collected the key is the Id of the
        # node while the values are the values of its friends.
        self.player_friendlist_dict = {}

        # The archive_connection database_connection saves the responses from the API so no API call
        # is ever done twice, this is new this version.

        archive_path = os.path.join(DATABASE_PATH, "archive.db")
        fetch_logger().info(archive_path)
        self.archive_connection = sqlite3.connect(archive_path)

        # This database_connection stores the unpacked data in the format used later.

        database_path = os.path.join(DATABASE_PATH, f"{self.server_name}.db")
        self.database_connection = sqlite3.connect(database_path)

        if restart:
            self.clear_results()

        build_database_tables(self.table_name, self.archive_connection,
                              self.database_connection)
Пример #8
0
    def leader_board_sample(self, limit=50):
        """
        I have used more than one method to get the initial list of
        character Ids. The first version simply used the id's of characters
        I was knew of. This new version is a bit less biased It gathers the
        players who were in the top limit places on the current leader-board
        for all areas of the leader-board available.
        Note that all leader-board stats are strongly correlated.
        """

        seed_ids = []
        for leaderboard_type in ["Kills", "Time", "Deaths", "Score"]:
            fetch_logger().info(f"Fetching {leaderboard_type} {limit}")
            url = f"http://census.daybreakgames.com/s:{SERVICE_ID}/get/" \
                  f"{self.namespace}/leaderboard/?name={leaderboard_type}" \
                  f"&period=Weekly&world={self.server_id}&c:limit={limit}"

            fetch_logger().info(url)
            decoded = fetch_url(url)
            try:
                decoded_leaderboard = decoded["leaderboard_list"]
            except Exception as err:
                # fetch_logger().error(decoded)
                fetch_logger().error(url)
                fetch_logger().error(f"Failed with {err}")
                raise err
            for characters in decoded_leaderboard:
                character_id = characters.get("character_id")
                if character_id is not None:
                    seed_ids.append(character_id)
        unique = list(set(seed_ids))
        # Record the starting nodes for debugging. The busy_timeout prevents
        # a issue where sqlite3 was not waiting long enough.
        # It probably isn't needed but....
        self.archive_connection.execute("PRAGMA busy_timeout = 30000")
        self.archive_connection.execute(
            "INSERT INTO seed_nodes (name,seed_nodes) VALUES(?,?)",
            (self.table_name, ",".join(unique)),
        )
        return seed_ids
Пример #9
0
    def interpret_character_data(self):
        """
        Unpacks the character data gathered previously,
        reading the raw data from the archive_connection and writing it into the database_connection.
        """
        completed_id = single_column(
            self.database_connection,
            f"SELECT character_id from {self.table_name}_node")
        results = []
        get_unpacked = f"SELECT character_id, raw from {self.table_name}_node"
        for raw in multi_column(self.archive_connection, get_unpacked):
            if raw[0] not in completed_id:
                results.append(json.loads(raw[1]))
        # Unpack and add it to the snapshots.
        for char_info in results:
            # Basic avatar information.
            character_id = int(
                char_info.get("character_id", "0000000000000000000"))
            name = char_info["name"].get("first", "not available")
            faction_id = char_info.get("faction_id", -1)
            faction = {
                "1": "VS",
                "2": "NC",
                "3": "TR"
            }.get(faction_id, "has no faction")
            br = char_info.get("battle_rank", {"value": "-1"})["value"]
            # Time data:
            tInfo = char_info.get("times")
            creation_date = tInfo.get("creation", "0")
            login_count = tInfo.get("login_count", "0")
            minutes_played = tInfo.get("minutes_played", "0")
            last_login_date = tInfo.get("last_login", "0")
            # Outfit data:
            o = char_info.get("outfit", {"placeholder": "error2"})
            outfitTag = o.get("alias", -1)
            # outfitName = o.get('name', 'not available')
            outfitId = o.get("outfit_id", -1)
            outfitSize = o.get("member_count", -1)
            # Stat history is formatted differently, it returns a list
            # of stats of dictionaries containing the stat history:
            stats = char_info.get("stats", {
                "placeholder": "error2"
            }).get("stat_history")
            if type(stats) == list:
                death_stats = [
                    sd for sd in stats if sd["stat_name"] == "deaths"
                ]
                kill_stats = [sd for sd in stats if sd["stat_name"] == "kills"]

                if len(death_stats) != 1:
                    fetch_logger().error(
                        f"Incorrect number of kill stats {death_stats}")
                if len(kill_stats) != 1:
                    fetch_logger().error(
                        f"Incorrect number of kill stats {kill_stats}")
                kills = death_stats[0].get("all_time", -1)
                deaths = kill_stats[0].get("all_time", -1)
            else:
                kills, deaths = -1, -1

            stat_history_schema = f"""
                INSERT or replace INTO {self.table_name}_history (character_id, history) VALUES(?, ?)
            """

            self.database_connection.execute(stat_history_schema,
                                             (character_id, json.dumps(stats)))

            char_data_schema = f"""
                INSERT or replace INTO {self.table_name}_node (
                    character_id, name, faction, br,
                    outfitTag, outfitId, outfitSize,
                    creation_date, login_count, minutes_played, last_login_date,
                    kills, deaths)
                    Values(?,?,?,?,?,?,?,?,?,?,?,?,?)
            """

            self.database_connection.execute(
                char_data_schema,
                (
                    character_id,
                    name,
                    faction,
                    br,
                    outfitTag,
                    outfitId,
                    outfitSize,
                    creation_date,
                    login_count,
                    minutes_played,
                    last_login_date,
                    kills,
                    deaths,
                ),
            )

        self.database_connection.commit()
Пример #10
0
    def get_friends(self, characters_to_query):
        """
        Returns a dictionary where the keys are Ids and values are
        their friends lists
        """
        fetch_logger().info("Gathering friend lists")
        start_time = time.time()
        # Load existing values
        character_friendlist_results = {}
        # All nodes we have a archived friends-list for already.

        archived_friendlist_query = f"""
        SELECT character_id, raw 
            FROM {self.table_name}_edge
            WHERE character_id=?
        """
        unarchived_character_ids = []

        for character_id in characters_to_query:
            archived_friendlist_results = self.archive_connection.execute(
                archived_friendlist_query, (str(character_id), )).fetchone()
            if archived_friendlist_results is None:
                unarchived_character_ids.append(character_id)
                continue
            self.done.add(character_id)
            decoded = json.loads(archived_friendlist_results[1])
            character_friendlist_results[character_id] = decoded["friend_list"]

        fetch_logger().info(
            f"We already have {len(character_friendlist_results.keys())} friendlists stored of the "
            f"{len(characters_to_query)} desired lists!")

        fetch_logger().info(
            f"Of those {len(unarchived_character_ids)} need to be fetched from the API"
        )

        batched_remaining_nodes = chunks(unarchived_character_ids,
                                         FRIEND_BATCH_SIZE)
        total_batches = len(batched_remaining_nodes)

        problematic_character_ids = []

        for batch_number, l in enumerate(batched_remaining_nodes):
            fetch_logger().info(f"[{batch_number} of {total_batches}]")
            results = fetch_friend_lists_for_characters(
                self.namespace, l, problematic_character_ids)

            for raw_friendlist_record in results:
                # First dump the raw results of the call into a table
                current_char_id = raw_friendlist_record["character_id"]
                serialized_record = json.dumps(raw_friendlist_record)

                self.archive_connection.execute(
                    f"INSERT OR REPLACE into {self.table_name}_edge (character_id, raw) VALUES(?, ?)",
                    (current_char_id, serialized_record),
                )
            self.archive_connection.commit()

            for f in results:
                character_friendlist_results[
                    f["character_id"]] = f["friend_list"]

            for problem in problematic_character_ids:
                problem_id = problem[0]
                problem_report = {'character_id': problem_id}
                serialized_record = json.dumps(problem_report)

                character_friendlist_results[problem_id] = {}

                self.archive_connection.execute(
                    f"INSERT OR REPLACE into {self.table_name}_edge (character_id, raw) VALUES(?, ?)",
                    (problem_id, serialized_record),
                )

            self.archive_connection.commit()

        # Record information on the ids that have been problematic
        for problem_character_id in problematic_character_ids:
            self.archive_connection.execute(
                f"INSERT INTO {self.table_name}problem_character_ids (character_id) VALUES(?)",
                (str(problem_character_id), ))
        self.archive_connection.commit()
        fetch_logger().debug(f"get_friends took: {time.time() - start_time}")
        return character_friendlist_results
Пример #11
0
    def unpack_friendlists(self, raw_friendlists: dict) -> list:
        """
        Unpack a bunch of friend lists into the datatables. Returns a list of friends that should be checked

        """
        tstart = time.time()

        insert_edge_query = f"""
        INSERT OR IGNORE INTO {self.table_name}_eset (Source, Target, Status) 
            VALUES (?, ?, ?)
        """
        edge_set_args = []

        character_info_query = f"""
        INSERT OR IGNORE INTO {self.table_name}_character_info (character_id, last_login_date)
            VALUES (?, ?)
        
        """
        character_info_args = []

        character_id_queue = set()

        covered_friends = 0
        uncovered_friends = 0
        out_of_scope_friends = 0

        out_of_scope_context = {}

        for character_id, friend_list in raw_friendlists.items():
            self.done.add(character_id)
            for friend in friend_list:
                # It is at this point that "friend" starts to look like a fake word
                friend_id = friend.get("character_id", -1)

                if friend_id in self.initial_character_ids:
                    # In such a case the id is already in done but we still need to know how old it is
                    character_info_args.append(
                        (character_id, friend.get("last_login_time", 0)))

                status = self.categorize_friends(friend)

                # We only want to query "normal" ie not old and same server friends of friends.
                if status == "normal":
                    # If a friends_id is already inside of done we can assume its edge to the current character already
                    # exists and move on
                    if friend_id in self.done:
                        covered_friends += 1
                    else:
                        character_id_queue.add(friend_id)
                        uncovered_friends += 1
                else:
                    out_of_scope_friends += 1
                    if status not in out_of_scope_context:
                        out_of_scope_context[status] = 1
                    else:
                        out_of_scope_context[status] += 1
                character_info_args.append(
                    (character_id, friend.get("last_login_time", 0)))
                edge_set_args.append((character_id, friend_id, status))

        cursor = self.database_connection.cursor()
        cursor.executemany(character_info_query, character_info_args)
        cursor.executemany(insert_edge_query, edge_set_args)
        self.database_connection.commit()

        fetch_logger().info(
            f"Unpacked {len(raw_friendlists)} found {covered_friends} characters we already have in "
            f"the database. {uncovered_friends} characters we need to query and {out_of_scope_friends} "
            f"friends who are forbidden to this crawler {out_of_scope_context}"
        )
        fetch_logger().critical(f"Unpacking took {time.time()-tstart}")
        return list(character_id_queue)
Пример #12
0
 def run(self):
     fetch_logger().info("Gathering edges")
     self.get_friendlist_network()
     fetch_logger().info("Gathering node attributes")
     self.get_node_attributes()
Пример #13
0
def fetch_friend_lists_for_characters(
        namespace, character_list: List[str],
        problematic_character_ids: List[int]) -> List[dict]:
    """
    Return the list of friend list responses from the server. Also return the list of character ids who couldn't be
    loaded due to errors!
    """

    fetch_logger().info(f"fetch_friend_lists_for_characters {character_list}")
    # Attempt to build a url for this set of characters and handle errors encountered along the way.
    unique_characters = list(set(character_list))

    if len(character_list) > 1:
        character_ids = ",".join(unique_characters)
    else:
        character_ids = str(character_list[0])

    friend_list_results = []

    url = f"http://census.daybreakgames.com/s:{SERVICE_ID}/get/{namespace}/characters_friend/" \
          f"?character_id={character_ids}&c:resolve=world"

    try:
        decoded = fetch_url(url)
        friend_list_results = decoded["characters_friend_list"]

    except GiveUpException as possible_overload_error:
        # Some characters have errors when you load the friends list. unclear why.
        if len(character_list) > 1:
            fetch_logger().error(
                f"Unable to load large group of ids: {character_list}")
            fetch_logger().error(str(possible_overload_error))
            for indi_index, individual in enumerate(character_list):
                fetch_logger().info(
                    f"Attempting to run individual {indi_index} ({individual})"
                )

                individual_results = fetch_friend_lists_for_characters(
                    namespace, [individual], problematic_character_ids)
                if len(individual_results) > 0:
                    friend_list_results.extend(individual_results)
                else:
                    fetch_logger().warning(
                        f"Unable to fetch data for player {individual} for whatever reason"
                    )

        elif len(character_list) == 1:
            problematic_character_ids.append(character_list)

    except Exception as err:
        fetch_logger().error(
            f"Unable to fetch friendlist for {character_list} {err} giving up and moving on"
        )
    return friend_list_results