def create_matrix(states): # Create item-centric preference matrix from DataFrame itemPrefs = {} for idx, row in states.iterrows(): itemPrefs[row["story_id"]] = {} for st in parameters.state_names(): itemPrefs[row["story_id"]][st] = row[st] return itemPrefs
def create_states_dataframe(input_data): # Create DataFrame for States data = [] for row in input_data: d = json.loads(row) story_id = d['data']['story_id'] # Create State Vector statedata = {} for e in d['data']['citiesbycountry']: state = e[0][3:5] if state in statedata: statedata[state] += e[1] else: statedata[state] = e[1] statevector = [] for state in parameters.state_names(): if state in statedata: statevector.append(statedata[state]) else: statevector.append(0) new_row = [story_id] + statevector data.append(new_row) states_dataframe = pandas.DataFrame(data, columns = ['story_id'] + parameters.state_names()) return states_dataframe
# Retrieve linkinfo sql_query = "SELECT * FROM " + linkinfo_tablename cursor.execute(sql_query) rows = cursor.fetchall() data = tuple_to_list(rows) linkinfo = pandas.DataFrame(data, columns = ['old_ix', 'story_id', 'rate', 'thumbnail_url', 'description', 'title', 'url', 'clicks']) # Retrieve state data sql_query = "SELECT * FROM " + states_tablename cursor.execute(sql_query) rows = cursor.fetchall() data = tuple_to_list(rows) states = pandas.DataFrame(data, columns = ['old_ix', 'story_id'] + parameters.state_names()) # Retrieve country data sql_query = "SELECT * FROM " + countries_tablename cursor.execute(sql_query) rows = cursor.fetchall() data = tuple_to_list(rows) countries = pandas.DataFrame(data, columns = ['old_ix', 'story_id', 'count', 'us']) info = {} for ix, row in linkinfo.iterrows(): story_id = row['story_id'] info[story_id] ={} info[story_id]['title'] = row['title'] info[story_id]['description'] = row['description']