Пример #1
0
def test_read_table():
    database = Database()
    df = database.read_table('event_aggregates', limit=10)
    assert len(df) == 10

    df = database.read_table(
        'event_aggregates', 
        query=('name', 'Rodef 2100'),
        where=[('start_datetime',{'>=': "'2018-01-01'"})],
        limit=10
    )
    assert len(df) > 0
    for i in df.index:
        row = df.loc[i]
        assert str(row['start_datetime']) >= '2018-01-01'
        assert '2100' in row['name'] or 'rodef' in row['name'].lower()

    count = database.count_rows('event_aggregates', query=('name', 'Rodef 2100'))
    assert count > 0
Пример #2
0
class Network():
    """ Pulls in data from the events database and
    performs social network analysis. """
    def __init__(self):
        daiquiri.setup(level=logging.INFO)
        self.logger = daiquiri.getLogger(__name__)

        self.database = Database()
        self.events_manager = Events()
        self.network = None
        self.metrics = {
            'node_connectivity': self.node_connectivity,
            'edge_connectivity': self.edge_connectivity,
            'density': self.density,
            'membership_scaled_density': self.membership_scaled_density
        }

    def get_events(self, start, end):
        """ Pulls events from the event database
        within the specified date range. """
        events = self.events_manager.database.read_table(
            'event_aggregates',
            columns=['id', 'start_datetime'],
            where=[('start_datetime', {
                '>=': start,
                '<': end
            })],
        )
        return events

    def evaluate_network(self, metrics=None, date=None):
        """ Constructs a co-attendance network ending at
        the specified data and begging <lag> days before
        the date. """
        if not metrics:
            metrics = list(self.metrics.keys())
        elif isinstance(metrics, str):
            metrics = [metrics]

        biggest_subgraph = self.biggest_subgraph(self.network)
        evaluation = {}
        for metric in metrics:
            if metric in self.metrics:
                if metric.endswith('connectivity'):
                    value = self.metrics[metric](biggest_subgraph)
                else:
                    value = self.metrics[metric](self.network)
                evaluation[metric] = value
        return evaluation

    def build_network(self, date, lag, max_attendees=None):
        """ Builds the congregational co-attendance network
        ending at the specified date and ending <lag> days later

        Parameters
        ----------
            date: the start date in 'YYYY-MM-DD' format
            lag: int, the number of days to go back
            max_attendees: int, ignores events that have
                more than the max number of attendees

        Returns
        -------
            sets the network attribute to be a network x graph
        """
        network = nx.Graph()
        # Determine the start and end dates for the pull
        self.date = date
        split_date = date.split('-')
        year = int(split_date[0])
        month = int(split_date[1])
        day = int(split_date[2])
        end_datetime = datetime.datetime(year, month, day)
        end = "'{}'".format(date)
        start_datetime = end_datetime - datetime.timedelta(days=lag)
        start = "'{}'".format(str(start_datetime)[:10])

        # Build the network G(V,E) where V is the set of participants
        # and an edge exists between two vertices if they have
        # attended the same events
        events = self.get_events(start, end)
        msg = '\nStart: {date} \nLag: {lag} days \nEvent Count: {count}'
        msg = msg.format(date=date, lag=lag, count=len(events))
        self.logger.info(msg)
        for event_id in events['id']:
            attendees = self.events_manager.get_attendees(event_id)
            if max_attendees:
                if len(attendees) > max_attendees:
                    continue
            attendees = [x for x in attendees if x['first_name']]
            names = [
                ' '.join([x['first_name'], x['last_name']]).lower()
                for x in attendees
            ]
            names = list(set(names))  # Drops duplicates
            pairs = itertools.combinations(names, 2)
            for pair in pairs:
                network.add_edge(*pair)
            self.network = network

    @staticmethod
    def density(network):
        """ Finds the density of the graph. """
        return nx.density(network) * 100

    def membership_scaled_density(self, network):
        """ Finds the density of the network scaled by the number of members. """
        date = "'{}'".format(self.date)
        member_count = self.database.count_rows('members',
                                                where=[('membership_date', {
                                                    '<=': date
                                                })])
        actual_connections = len(network.edges)
        potential_connections = (member_count * (member_count - 1)) / 2
        density = actual_connections / potential_connections
        return float(density) * 100

    @staticmethod
    def node_connectivity(network):
        """ Computes the node connectivity for the network. """
        connectivity = approx.node_connectivity(network)
        return connectivity

    @staticmethod
    def edge_connectivity(network):
        """ Compute the edge connectivity for the network. """
        connectivity = nx.connectivity.edge_connectivity(network)
        return connectivity

    @staticmethod
    def biggest_subgraph(network):
        """ Finds the biggest fully connected subgraph of the network. """
        connected_components = nx.connected_components(network)
        graphs = [network.subgraph(x).copy() for x in connected_components]
        sizes = [len(x.nodes) for x in graphs]
        idx = sizes.index(max(sizes))
        biggest_graph = graphs[idx]
        return biggest_graph