Пример #1
0
def get_countries_indicators():
    ind = Database.find_one("project_data", {"index": "indicators"})
    cn = Database.find_one("project_data", {"index": "countries"})
    ind['_id'] = str(ind['_id'])
    cn['_id'] = str(cn['_id'])

    return {"indicators": ind, "countries": cn}
def get_country_df(country_name, dates_updated, last_default_year):
    """
    Gets the country df if it has already been transposed and gaps-filled, or calls the transpose function
    to conduct mergins, interpolation and pre-prediction

    :param country_name: the country name of the df to be delivered
    :param dates_updated: Boolean. If true, then the current function is called for chart updating
    , instead of chart initializing
    :param last_default_year: the last year to be mapped the df
    :return: the country df
    """
    if Database.find('dataframes', {'index': country_name}).count() > 0:
        country_df = get_df(country_name, dates_updated)
    else:
        indicators = Database.find_one('project_data', {'index': 'indicators'})['data']
        dates = [f'{x}-01-01' for x in range(2005, last_default_year+1)]

        # get the provided country's df with it's indicators for columns and interpolate the gaps in the middle & start
        country_df = transpose_df(indicators=indicators, dates=dates, country_name=country_name)\
            .interpolate(method='linear', axis=0, limit_direction='backward')

        for col in country_df:
            nans = country_df[col].isna().sum()
            if nans >= 1:
                country_df = pre_predict(country_df, nans, col)

    if 'date' not in country_df.columns:
        country_df['date'] = country_df.index

    return country_df
Пример #3
0
def europe_map():
    countries = Database.find_one('project_data',
                                  {'index': 'countries'})['data']
    indicators = Database.find_one('project_data',
                                   {'index': 'indicators'})['data']
    return render_template('index.html',
                           countries=countries,
                           indicators=indicators)
Пример #4
0
def save_df_to_mongo(df_name, df):
    """
    Saves a specific df to the mongodb, in the collection dataframes

    :param df_name: the table/df name
    :param df: the df to be saved
    :return: None
    """
    Database.df_to_mogodb(collection='dataframes', df_name=df_name, df=df)
def save_partial_df_to_mongo(country_name, country_df_partial):
    """
    Saves the mapped df to the mongodb, in the 'partial_dfs' collection.

    :param country_name: the name of the country/df
    :param country_df_partial: the mapped df
    :return: None
    """
    # save to mongoDB
    Database.df_to_mogodb(collection='partial_dfs', df_name=country_name, df=country_df_partial, scaling=True)
def get_df_from_mongodb(name, collection):
    dates = [f'{x}-01-01' for x in range(2005, 2019)]
    df = Database.from_mongodb_df(collection, {'index': name}).reset_index(drop=True)\
                    .set_index('date', drop=True).reindex(dates)
    print(df)
    # df['date'] = df.index

    return df
Пример #7
0
def get_df(name, dates_updated):
    """
    Gets the dataframe from the mongodb, and conducts a pre-prediction if needed (if there are any nan values in the
    last rows of the df), after conducting interpolation to the rows in the middle and in the beggining.

    :param name: the df name
    :param dates_updated: Boolean. If true, then the function is called for chart update, and not initial load. This is
    useful in order to use the existing dates in the table, or map the df to years (2005, 2018).
    :return: the df
    """

    if not dates_updated:
        dates = [f'{x}-01-01' for x in range(2005, 2019)]
        df = Database.from_mongodb_df('dataframes', {'index': name}).reset_index(drop=True)\
            .set_index('date', drop=False).reindex(dates)
        df['date'] = df.index
    else:
        df = Database.from_mongodb_df('dataframes', {'index': name})
    flag = False
    temp_col = None

    if 'date' in df.columns:
        flag = True
        temp_col = df['date']
        df = df.loc[:][[x for x in df.columns if x != 'date']]

    # if there are any nans
    nans = len(df) - df.count()
    if any(nans) >= 1:
        # fill the gaps in the start & in the middle
        df = df.interpolate(method='linear',
                            axis=0,
                            limit_direction='backward')
        # drop the colums with all their values equal to NaN
        df = df.dropna(how='all', axis=1)
        # fills the gaps in the last rows of df, until year 2018
        for col in df.columns:
            if col != 'date':
                predictable_nans = df[col].isna().sum()
                if predictable_nans >= 1:
                    df = pre_predict(df, predictable_nans, col)
    if flag:
        df_date = pd.DataFrame(temp_col)
        df = pd.concat([df_date, df], axis=1)

    return df
Пример #8
0
def get_indicators(header):
    """
    Gets the indicators names, from mongodb, for a specific PESTEL header.

    :param header: PESTEL header
    :return: a list with the specific's PESTEL header indicators
    """
    pestel_categories = Database.find_one("project_data",
                                          {"index": "pestel_categories"})

    return pestel_categories['data'][header]
def transpose_df(indicators, dates, country_name):
    """
    Gets all the indicator dfs and from them keeps the columns of the country_name provided, merging them in a new df,
    and keeping those which don't have more than 60% of nan values.

    :param indicators: the list with the indicators, got from mongodb
    :param dates: the dates list, with the dates/rows to keep
    :param country_name: the name of the country/df which will be constructed
    :return: the country df
    """
    country_name = country_name.replace('_', ' ')
    # check if the country_name provided exists in indicator_df columns and get its index
    idx = 0
    for i in range(len(indicators)):
        if country_name in Database.from_mongodb_df('dataframes', {'index': indicators[i]}):
            idx = i

    master_df = pd.DataFrame(Database.from_mongodb_df('dataframes', {'index': indicators[idx]})
                             # .sort_values(by=['date'], ascending=False)
                             .reset_index(drop=True).set_index('date', drop=True).reindex(dates)[country_name]
                             .rename(indicators[idx])).astype(float)
    for indicator in indicators:
        if indicator != indicators[idx]:
            # get from mongodb, set index the dates & keep only the ones between (2005,2018) or until the year predicted
            indicator_df = Database.from_mongodb_df('dataframes', {'index': indicator}).reset_index(drop=True)\
                .set_index('date', drop=True).reindex(dates)

            if country_name in indicator_df.columns:
                indicator_df = indicator_df[country_name]
                # rename the series whith the indicator's name, so that it will be it's column name
                indicator_df = pd.DataFrame(indicator_df.rename(indicator)).astype(float)

                # merge columns to a single dataframe
                master_df = master_df.merge(indicator_df, on='date')
    # drop the columns with more than 60% nan values
    master_df = master_df.loc[:, master_df.isin([' ', np.nan, 0]).mean() < .6]

    return master_df
Пример #10
0
 def get_df_type(self):
     countries = Database.find_one('project_data',
                                   {'index': 'countries'})['data']
     if self.name in countries:
         df = get_country_df(self.name,
                             self.dates_updated,
                             last_default_year=2018)
     else:
         df = get_df(self.name, self.dates_updated)
     # drop the columns that have more than 60% nan values
     df = df.loc[:, df.isin([' ', np.nan, 0]).mean() < .6].rename(
         columns={
             'Slovakia': 'Slovak_Republic',
             'Czechia': 'Czech_Republic'
         })
     return df
Пример #11
0
def table_to_google(header):
    """
    Creates a dictionary, to be used from Google visualization table API, in order to construct
    the PESTEL indicators tables, for a specific header/category

    :param header: the PESTEL header/category
    :return: the dictionary, to be used from GOOGLE API
    """
    pestel_categories = Database.find_one("project_data",
                                          {"index": "pestel_categories"})

    colns = [{
        "id": header,
        "label": header,
        "type": "string"  # if df[col].dtype == 'O' else "number"
    }]

    rows = []
    for row in pestel_categories['data'][header]:
        row = [{"v": f"<a class='indicator-link'>{row}</a>"}]
        rows.append({"c": row})
    to_google = json.dumps({"cols": colns, "rows": rows})

    return to_google
Пример #12
0
def get_line_data(df_name, columns, collection_name, reverse=False):
    """
    Gets the data, needed for the scatter plot, from mongodb and converts them to json format
    :param df_name: the table-df name
    :param columns: the columns to display. If there have been added any columns to the table (multi-line scatter),
     it displays them.
    :param collection_name: the mongodb collection name to get the df data
    :param reverse: Boolean. If true, reverse the df in descending order
    :return: the json data, with the values of the df columns,  a list with generated colors to be used in the
    different lines of the scatter, and a list with the columns names to be displayed
    """
    data = []
    color_list = []
    cols = [x for x in columns if x != 'date']
    for col, color in zip(cols, colors):
        data.append(
            json.dumps(
                Database.from_mongodb_json(collection_name,
                                           df_name,
                                           col,
                                           reverse=reverse)))
        color_list.append(color)

    return data, color_list, columns
Пример #13
0
regions_list = [Northern, Eastern, Western, Southern]

countries_list = []
for i in regions_list:
    for j in i:
        countries_list.append(j)

countries_list.sort()

# lambda function to parse strings to datetime
dateparse_dby = lambda dates: pd.datetime.strptime(dates, "%d %b %Y")
dateparse_ymd = lambda dates: pd.datetime.strptime(dates, "%Y-%m-%d")
dateparse_y = lambda dates: pd.datetime.strptime(dates, "%Y")

my_dict = {'index': 'countries', 'data': countries_list}
Database.insert("project_data", my_dict)

my_dict = {'index': 'indicator_aliases', 'data': indicators}
Database.insert("project_data", my_dict)

my_dict = {'index': 'pestel_categories', 'data': pestel_categories}
Database.insert("project_data", my_dict)

my_dict = {'index': 'eurostat_country_codes', 'data': eurostat_country_codes}
Database.insert("project_data", my_dict)

my_dict = {'index': 'country_codes', 'data': country_codes}
Database.insert("project_data", my_dict)

my_dict = {'index': 'indicators', 'data': [x for x in indicators.values()]}
Database.insert("project_data", my_dict)
Пример #14
0
def references_page():
    countries = Database.find_one('project_data', {'index': 'countries'})['data']
    indicators = Database.find_one('project_data', {'index': 'indicators'})['data']
    return render_template("references.html", countries=countries, indicators=indicators)
Пример #15
0
def get_country_codes():
    country_codes = Database.find_one("project_data",
                                      {"index": "country_codes"})
    country_codes['_id'] = str(country_codes['_id'])
    return country_codes
Пример #16
0
def update_df_chart(self,
                    cols,
                    selected_table,
                    last_year,
                    predict_flag=False,
                    update_type=None,
                    col_sel=None):
    """
    Updates the scatter and the table, by adding or removing columns and rows, or conducting prediction.

    :param self: the df object which calls the function, like being inside the class
    :param cols: the existing columns of the table/scatter
    :param selected_table: the df/table name
    :param last_year: the last element of the 'date' column inside the df, referring to the last date
    :param predict_flag: Boolean. If yes, then it was called by the getPredicted js function
    :param update_type: String: referring to the kind of update to be implemented, and provided by ajax call.
    It can take the folloing values: ('addCol', 'remCol', 'remYear', 'addYear', 'update', 'switch'), for adding
    a column/line, removing column/line, removing a row/date, adding a row/date, and pressing the
    update or switch btns in js.
    :return: data to be used in scatter and table updating
    """
    remaining_cols = None
    # initialize the columns list with the date
    columns = ['date']
    # if there are more columns than one
    cols = cols.split(',')
    for x in cols:
        columns.append(x)
    # get the df from the mongoDB
    countries = Database.find_one('project_data',
                                  {'index': 'countries'})['data']
    # if the df is a country and the columns are indicators, it is required merging and scaling
    df = self.df
    df_total_col_length = len(df.columns)
    df_index_length = len(df['date'])
    raw_dates = [
        x for x in df['date'].sort_values(ascending=False)
        if int(x.split('-')[0]) <= int(last_year)
    ]
    if update_type == 'addCol':
        remaining_columns = [
            x for x in df.columns if x.replace(' ', '_') not in columns
        ]

        if len(remaining_columns) > 0:
            # columns.append(random.choice(remaining_columns))
            columns.append(col_sel)
            remaining_cols = [
                x for x in remaining_columns if x.replace(' ', '_') != col_sel
            ]
        # get the current dates in YYYY-mm-dd format
        raw_dates = [
            x for x in df['date'].sort_values(ascending=False)
            if int(x.split('-')[0]) <= int(last_year)
        ]
    elif update_type == 'remCol':
        remaining_cols = [
            x for x in df.columns if x.replace(' ', '_') not in columns
        ]
        remaining_cols.append(columns[-1])
        columns = columns[:-1]
        # get the current dates in YYYY-mm-dd format
        raw_dates = [
            x for x in df['date'].sort_values(ascending=False)
            if int(x.split('-')[0]) <= int(last_year)
        ]
    elif update_type == 'addYear':
        if predict_flag == 'True':
            # TODO if there has been conducted already a forecasting just get the next year, avoiding the prediction
            df = predict(df)
            Database.df_to_mogodb('dataframes',
                                  self.name,
                                  df,
                                  scaling=False,
                                  alias=False)
        raw_dates = [
            x for x in df['date'].sort_values(ascending=False)
            if int(x.split('-')[0]) <= int(last_year) + 1
        ]
    elif update_type == 'remYear':
        raw_dates = [
            x for x in df['date'].sort_values(ascending=False)
            if int(x.split('-')[0]) < int(last_year)
        ]
    elif update_type == 'switch':
        raw_dates = [
            x for x in df['date'].sort_values(ascending=False)
            if int(x.split('-')[0]) <= int(last_year)
        ]
    elif update_type == 'update':
        raw_dates = [
            x for x in df['date'].sort_values(ascending=False)
            if int(x.split('-')[0]) <= int(last_year)
        ]
    # convert the dates in YYYY format, in which it will be plotted
    dates = [x.split('-')[0] for x in raw_dates]
    # replace whitespaces in indicators
    df.columns = [x.replace(' ', '_') for x in df.columns]
    columns = [x.replace(' ', '_') for x in columns]
    # map the df in the desired columns list
    df = df.loc[:][columns]
    # map the df in the rows of the new dates
    df = df.reset_index(drop=True)
    df = df[df['date'].isin(raw_dates)].sort_values(by=['date'],
                                                    ascending=False)
    # scale the df in the mapped columns, not to it's all columns
    if selected_table in countries:
        if update_type == 'addCol' or update_type == 'remCol' or update_type == 'remYear' or update_type == 'addYear':
            df, min_max_scaler = scale(df)
    # exclude the date column from the df, so that the var to_google, to contain only data values
    columns = [x.replace('_', ' ') for x in df.columns if x != 'date']
    to_google = table_to_google(df, columns, dates)
    # convert the lists to json
    columns = json.dumps(columns)
    dates = json.dumps(dates)
    #  convert df columns to json
    my_dict = {}
    for col in df.columns:
        my_dict[col] = json.loads(
            df.iloc[:-1, :][col].to_json(orient='records'))
    return my_dict, dates, columns, to_google, df_total_col_length, df_index_length, last_year, remaining_cols
Пример #17
0
def pestel_pie():
    countries = Database.find_one('project_data', {'index': 'countries'})['data']
    indicators = Database.find_one('project_data', {'index': 'indicators'})['data']
    return render_template("pie_chart.html", countries=countries, indicators=indicators)