Пример #1
0
def load_data():
    url = 'https://es.wikipedia.org/wiki/Merval'
    html = pd.read_html(url, header=0)
    df = html[1]
    #Let's select 'Pesos' as the currency
    df[".BA"] = ".BA"
    df["Símbolo"] = df["Símbolo"] + df[".BA"]
    del df['.BA']
    #Let's improve the classification
    df = df.replace(to_replace='Petróleo y Gas', value='Energético')
    df = df.replace(to_replace='Energético', value='Energy')
    df = df.replace(to_replace='Industrial (Aluminio)', value='Industrial')
    df = df.replace(to_replace='Industrial (Siderúrgica)', value='Industrial')
    df = df.replace(to_replace='Inmobiliario-Agropecuario',
                    value='Servicios industriales y otros')
    df = df.replace(to_replace='Fabricación de productos',
                    value='Servicios industriales y otros')
    df = df.replace(to_replace='Servicios industriales y otros',
                    value='Services')
    df = df.replace(to_replace='Finanzas', value='Financiero')
    df = df.replace(to_replace='Financiero', value='Finance')
    df = df.replace(to_replace='Telecomunicaciones', value='Telecommunication')
    df = df.replace(to_replace='Bancario', value='Bank')
    df = df.replace(to_replace='Construcción (Cementos)', value='Construction')
    st.cache(allow_output_mutation=True)
    return df
Пример #2
0
def upload_function_1():
    data1 = st.cache(pd.read_csv)('Effimax - Sunidhi - History.csv',
                                  parse_dates=True,
                                  index_col='Timestamp',
                                  dayfirst=True)
    data1 = data1.loc[data1['EFF_Boiler_ON'] == 1]
    data2 = st.cache(pd.read_csv)('Effimax - Sunidhi - History.csv')
    data2 = data2.loc[data2['EFF_Boiler_ON'] == 1]

    return data1, data2
Пример #3
0
 def get_models():
     fast_load = st.cache(torch.load, ignore_hash=True)
     checkpoint_gen = fast_load(paths[1])
     checkpoint_dis = fast_load(paths[0])
     _args = dict([('clipping_threshold_d', 0), ('obs_len', 10),
                   ('batch_norm', False), ('timing', 0),
                   ('checkpoint_name', 'gan_test'),
                   ('num_samples_check', 5000), ('mlp_dim', 64),
                   ('use_gpu', 1), ('encoder_h_dim_d', 16),
                   ('num_epochs', 900), ('restore_from_checkpoint', 1),
                   ('g_learning_rate', 0.0005), ('pred_len', 20),
                   ('neighborhood_size', 2.0), ('delim', 'tab'),
                   ('d_learning_rate', 0.0002), ('d_steps', 2),
                   ('pool_every_timestep', False),
                   ('checkpoint_start_from', None), ('embedding_dim', 16),
                   ('d_type', 'local'), ('grid_size', 8), ('dropout', 0.0),
                   ('batch_size', 4), ('l2_loss_weight', 1.0),
                   ('encoder_h_dim_g', 16), ('print_every', 10),
                   ('best_k', 10), ('num_layers', 1), ('skip', 1),
                   ('bottleneck_dim', 32), ('noise_type', 'gaussian'),
                   ('clipping_threshold_g', 1.5), ('decoder_h_dim_g', 32),
                   ('gpu_num', '0'), ('loader_num_workers', 4),
                   ('pooling_type', 'pool_net'), ('noise_dim', (20, )),
                   ('g_steps', 1), ('checkpoint_every', 50),
                   ('noise_mix_type', 'global'), ('num_iterations', 80000)])
     _args = AttrDict(_args)
     generator = get_generator(_args, checkpoint_gen)
     discriminator = get_discriminator(_args, checkpoint_dis)
     data_path = get_dset_path(args.dataset_name, args.dset_type)
     _, loader = data_loader(_args, data_path)
     return _args, generator, discriminator, data_path, loader
Пример #4
0
def ttl_cache(
    fn=None,
    ttl=TTL_DURATION,
    force_streamlit=False,
    force_joblib=False,
    key="ui.info",
    **kwargs,
):
    """
    Default time-to-live cache logic.
    """
    if fn is None:
        return lambda f: ttl_cache(f, ttl, force_streamlit, force_joblib)

    if force_streamlit:
        return st.cache(ttl=ttl, **kwargs)(fn)
    elif force_joblib:
        return cache.ttl_cache(key, timeout=ttl, **kwargs)(fn)

    backend = os.environ.get("PYDEMIC_UI_CACHE_BACKEND", "joblib").lower()
    if backend == "joblib":
        return ttl_cache(fn, ttl, force_joblib=True)
    elif backend == "streamlit":
        return ttl_cache(fn, ttl, force_streamlit=True)
    else:
        raise ValueError(f"invalid cache backend: {backend!r}")
Пример #5
0
def explore(config, disable_cache=False):
    if not disable_cache:
        get_state = st.cache(persist=False, allow_output_mutation=True)(_get_state)
    else:
        get_state = _get_state
    dset = get_state(config)
    dset.expand = True
    st.title("Dataset Explorer: {}".format(type(dset).__name__))

    input_method = st.sidebar.selectbox(
        "Index selection method", ["Slider", "Number input", "Sample"]
    )
    if input_method == "Slider":
        idx = st.sidebar.slider("Index", 0, len(dset), 0)
    elif input_method == "Number input":
        idx = st.sidebar.number_input("Index", 0, len(dset), 0)
    elif input_method == "Sample":
        idx = 0
        if st.sidebar.button("Sample"):
            idx = np.random.choice(len(dset))
        st.sidebar.text("Index: {}".format(idx))

    show_example(dset, idx)

    st.header("config")
    cfg_string = pp2mkdtable(config, jupyter_style=True)
    cfg = st.markdown(cfg_string)
Пример #6
0
def show_basic_caching() -> None:
    st.header("Improve performance by caching")
    # TODO: load big file
    #       max size
    #       cache complex object -> custom hash function
    #
    st.write(
        """Caching is the core mechanic of streamlit to allow for an acceptable UX:  
    Simply decorate a function with `st.cache`""")

    use_cache = st.checkbox("Use caching?")

    st.write("Used function for loading the data:")
    load_data = _with_cache() if use_cache else _without_caching()
    st.write("Load and inspect the downloaded dataset")

    # simple caching
    with st.echo():
        base_url = "https://raw.githubusercontent.com/vega/vega-datasets/master/data/"

        src = st.selectbox("Vega Dataset:",
                           ["gapminder.json", "jobs.json", "flights-20k.json"])
        df = load_data(base_url + src)
        # n = st.slider("Show first n entries:", min_value=0, max_value=len(df), value=10, step=1000)
        st.dataframe(df.head(100))

    st.subheader("Not just dataframes can be cached")
    with st.echo():
        create_chart = st.cache(
            _create_chart,
            allow_output_mutation=True) if use_cache else _create_chart
        chart = create_chart(df)
        st.altair_chart(chart, use_container_width=True)

    st.info("**Hint**: Data is cached across sessions!")
Пример #7
0
def main():
    from init import init
    import streamlit as st
    global tokenizer, model
    tokenizer, model = st.cache(allow_output_mutation=True)(init)()
    with torch.no_grad():
        text = st.text_area("Input keywords:")
        if st.checkbox("hint mode?"):
            text = text.strip()
            text = text.replace(":", ":")
            text = text.replace(";", ";")
            text = text.replace(",", ",")
            text = text.replace(".", "。")
            text = text.replace("?", "?")
            if text[-1] not in set("?。,:;") and randint(0, 1) < 1:
                text = text + choice("?。,:;")
            template = "[MASK]" * 2 + choice(list("!"))
            # st.code(set("".join(stopwords("zh"))))
            text, score = fill_mask(text + template,
                                    allowed_words=tokenizer.tokenize(text))
            st.code((text, score))
        else:
            banned_self = st.checkbox("Banned self?")
            mode = [5, 7, 5]
            keywords = [x for x in text.split() if x.strip() != ""]
            st.code([
                keywords,
                make_sentence(mode, keywords, ban_self=banned_self)
            ])
Пример #8
0
def investigate_dataset():
    """Shows different information about dataset"""
    if not os.path.exists(DATASET_NAME):
        st.markdown(
            f'''Original dataset is avalilable on [kaggle]({REFERENCE}).
            You have to download it and put {DATASET_NAME} to current dir''')
        return

    read_and_cache_csv = st.cache(pd.read_csv)
    df = read_and_cache_csv(DATASET_NAME)

    st.write('Sample of dataframe:')
    st.write(df.head(100))

    st.header('Rentals on map')
    draw_plotly_figures(draw_map_plotly(pd.DataFrame.copy(df)))

    st.header("Rentals by vehicle's characteristics")
    draw_vehicles_characteristics(df)

    corr = df.corr()

    st.plotly_chart(
        px.imshow(
            corr.values,
            labels=dict(color="Correlation"),
            x=corr.index.values,
            y=corr.columns.values,
            title='Correlation matrix of numeric features',
            color_continuous_scale='turbo',
        ))
Пример #9
0
def _run_SEIR_BAYES_model(N, E0, I0, R0, R0__loc, R0__scale, gamma_loc,
                          gamma_scale, alpha_loc, alpha_scale, t_max, runs):
    S, E, I, R, t_space = st.cache(run_SEIR_BAYES_model)(
        N, E0, I0, R0, R0__loc, R0__scale, gamma_loc, gamma_scale, alpha_loc,
        alpha_scale, t_max, runs)
    fig = seir_bayes_plot(N, E0, I0, R0, R0__loc, R0__scale, gamma_loc,
                          gamma_scale, alpha_loc, alpha_scale, t_max, runs, S,
                          E, I, R, t_space)
    return fig
Пример #10
0
def wrap_with_st_cache_if_avaiable(f):
    """Wrap function with ST cache method if streamlit is importable"""
    try:
        import streamlit as st
        logger.info("Using streamlit cache for load")
        return st.cache(f, allow_output_mutation=True, show_spinner=False)
    except:
        logger.exception("Could not import streamlit and apply caching")
        print("You need streamlit to run use this method")
        return f
Пример #11
0
def upload_function():
    data1 = st.cache(pd.read_csv)(uploaded_file,
                                  parse_dates=True,
                                  index_col='Timestamp')
    data2 = st.cache(pd.read_csv)(uploaded_file, parse_dates=True)
    try:
        datax = data1
        datax['Datetime'] = pd.to_datetime(data2.Timestamp,
                                           format='%Y-%m-%d %H:%M')
        data1 = data1

    except:
        data1 = st.cache(pd.read_csv)(uploaded_file,
                                      parse_dates=True,
                                      index_col='Timestamp',
                                      dayfirst=True)
        #data6['Datetime'] = pd.to_datetime(data2.Timestamp ,format='%d-%m-%Y %H:%M')

    return data1, data2
Пример #12
0
def get_data(file):
    """
    Read data from csv and parse data
    """
    read_cache_csv = st.cache(pd.read_csv, allow_output_mutation=True)
    df = read_cache_csv(file)

    def date_string_option(format_date):
        if format_date == "%d/%m/%Y":
            return 'day/month/year'
        if format_date == "%m/%d/%Y":
            return 'month/day/year'
        if format_date == "%Y %m %d %H:%M:%S":
            return 'year/month/day hour:minutes:seconds'

    if st.sidebar.checkbox('Read options'):
        user_input_dt = False
        mydateparser = lambda x: ()
        encode = st.sidebar.radio("Choose encoding method",
                                  ("utf-8", 'ISO-8859-1', 'us-ascii'))
        delimiter = st.sidebar.radio("Choose delimiter",
                                     (',', ';', ".", ' ', "|"))
        decimal = st.sidebar.radio("Choose decimal format", (".", ','))
        df = read_cache_csv(file,
                            encoding=encode,
                            sep=delimiter,
                            decimal=decimal)

        dt_checkbox = st.sidebar.checkbox("Data time format")
        if dt_checkbox:
            # user_input_dt = [st.sidebar.selectbox("Choose the datetime column", df.columns)]
            user_input_dt = [
                int(
                    st.sidebar.number_input("Insert datetime column position",
                                            format='%i',
                                            value=0))
            ]
            date_format = st.sidebar.selectbox('Choose date format', \
                ("%d/%m/%Y", "%m/%d/%Y", "%Y %m %d %H:%M:%S"), format_func = date_string_option)
            mydateparser = lambda x: pd.datetime.strptime(x, date_format)
            try:
                df = read_cache_csv(file, encoding=encode, sep = delimiter, decimal =decimal, \
                            parse_dates=user_input_dt, date_parser=mydateparser)
            except ValueError:
                st.error("Choose another column for the date time format")
                df = read_cache_csv(file,
                                    encoding=encode,
                                    sep=delimiter,
                                    decimal=decimal)

    format_header = (lambda x: strip_accents(str(x)).strip().lower().replace(
        ' ', '_').replace('(', '').replace(')', '').replace('/', ''))
    df.rename(format_header, axis='columns', inplace=True)

    return df
def main_chart():
    
    st.title("Exploring the Pattern of Chicago Crimes")
    
    #General data selection and preprocessing
    year = st.sidebar.slider('Year', 2001, 2020, value=2020)
    st.sidebar.write("Year Value Will Be Fixed Here (2020)")
    year = 2020
    num_of_samples = st.sidebar.slider('Total Case Number', 2000, 100000, value=10000, step=2000)
    data_cache = st.cache(read_data)
    results = data_cache(year)
    selected_data = random_select(results, num_of_samples)
    selected_data = add_extra_columns(selected_data)
    
    #Detailed Selection
    location_list = list(selected_data.groupby('Location Description').agg('count').sort_values('Case Number', ascending = False).index)
    location_list = location_list[:15]+['OTHER']
    
    crime_list = list(selected_data.groupby('Primary Type').agg('count').sort_values('Case Number', ascending = False).index)
    crime_list = crime_list[:10]+['OTHER']
    
    crimetype = st.sidebar.multiselect('Crime Type', crime_list, default = crime_list[:-5])
    location = st.sidebar.multiselect('Location', location_list, default = location_list[:-1])
    month = st.sidebar.selectbox('Month', ['All Month'] +list(range(1,12)))
    if crimetype != []:
        if 'OTHER' not in crimetype:
            selected_data = selected_data[selected_data.loc[:,'Primary Type'].apply(lambda x:x in crimetype)]
        else:
            target_list = crime_list
            for crime in crimetype:
                target_list.remove(crime)
            selected_data = selected_data[selected_data.loc[:,'Primary Type'].apply(lambda x:x not in target_list)]
            
    if location != []:
        if 'OTHER' not in location:
            selected_data = selected_data[selected_data.loc[:,'Location Description'].apply(lambda x:x in location)]
        else:
            target_list = location_list
            for loc in location:
                target_list.remove(loc)
            selected_data = selected_data[selected_data.loc[:,'Location Description'].apply(lambda x:x not in target_list)]
    if month != 'All Month':
        selected_data = selected_data[selected_data.loc[:,'Month']==month]
    selected_data = selected_data.reset_index().iloc[:,1:]
    st.subheader('Raw Data')
    st.write(selected_data)
    visualization_type = st.multiselect('Select the way you want to explore the data', ['Explore In Charts', 'Visualize In A Map', 'Machine Learning'], default = ['Explore In Charts'])
    if 'Visualize In A Map' in visualization_type:
        visualize_map(selected_data, crime_list)
    if 'Explore In Charts' in visualization_type:
        visualize_chart(selected_data)
    if 'Machine Learning' in visualization_type:
        visualize_ml(selected_data)
Пример #14
0
def get_df(df=None, encode='utf-8', delimiter=','):
    read_cache_csv = st.cache(pd.read_csv, allow_output_mutation=True)

    if st.checkbox('Read options'):
        encode = st.selectbox('Encoding method:',
                              ('utf-8', 'ISO-8859-1', 'us-ascii'))
        delimiter = st.selectbox('Delimiter:', (',', ';', '.', ' ', '|'))

    file = st.file_uploader('Upload your file (.csv):', type='csv')
    if file is not None:
        df = read_cache_csv(file, encoding=encode, sep=delimiter)

    return df
Пример #15
0
def run():
    st.sidebar.info(
        'You can either enter the news item online in the textbox or upload a txt file'
    )
    st.set_option('deprecation.showfileUploaderEncoding', False)
    add_selectbox = st.sidebar.selectbox("How would you like to predict?",
                                         ("Online", "Txt file"))
    image = Image.open('data/20170715_162310.jpg')
    st.sidebar.image(image)

    st.title("Predicting fake news")
    st.header('This app is created to predict if a news item is real or fake')

    if add_selectbox == "Online":
        text1 = st.text_area('Enter news text')
        output = ""
        if st.button("Predict"):
            output = predict(text1)
            output = str(output[0])  # since its a list, get the 1st item
            st.success(f"The news item is {output}")
            st.balloons()
    elif add_selectbox == "Txt file":
        output = ""
        file_buffer = st.file_uploader("Upload text file for new item",
                                       type=["txt"])
        if st.button("Predict"):
            text_news = file_buffer.read()

            # in the latest stream-lit version ie. 68, we need to explicitly convert bytes to text
            st_version = st.__version__  # eg 0.67.0
            versions = st_version.split('.')
            if int(versions[1]) > 67:
                text_news = text_news.decode('utf-8')

            print(text_news)
            output = predict(text_news)
            output = str(output[0])
            st.success(f"The news item is {output}")
            st.balloons()

    val_path = "data/val.csv"
    df = st.cache(pd.read_csv)(val_path)
    is_check = st.checkbox("Display validation data")
    if is_check:
        my_bar = st.progress(0)
        for percent_complete in range(100):
            time.sleep(0.1)
            my_bar.progress(percent_complete + 1)
        st.write(df)

    st.image(image)
def demo2():
    st.title('Demo 2')
    st.header('Selecting table rows')

    # https://gist.github.com/treuille/e8f07ebcd92265a68ecec585f7594918
    with st.echo():
        # Load some example data.
        DATA_URL = \
            "http://s3-us-west-2.amazonaws.com/streamlit-demo-data/uber-raw-data-sep14.csv.gz"
        data = st.cache(pd.read_csv)(DATA_URL, nrows=1000)

        # Select some rows using st.multiselect. This will break down when you have >1000 rows.
        st.write('### Full Dataset', data)
        selected_indices = st.multiselect('Select rows:', data.index)
        selected_rows = data.iloc[selected_indices]
        st.write('### Selected Rows', selected_rows)
Пример #17
0
def fancy_cache(func=None, ttl=600, unique_to_session=False, **cache_kwargs):
    """A fancier cache decorator which allows items to expire after a certain time
    as well as promises the cache values are unique to each session.
    Parameters
    ----------
    func : Callable
        If not None, the function to be cached.
    ttl : Optional[int]
        If not None, specifies the maximum number of seconds that this item will
        remain in the cache.
    unique_to_session : boolean
        If so, then hash values are unique to that session. Otherwise, use the default
        behavior which is to make the cache global across sessions.
    **cache_kwargs
        You can pass any other arguments which you might to @st.cache
    """
    # Support passing the params via function decorator, e.g.
    # @fancy_cache(ttl=10)
    if func is None:
        return lambda f: fancy_cache(func=f,
                                     ttl=ttl,
                                     unique_to_session=unique_to_session,
                                     **cache_kwargs)

    # This will behave like func by adds two dummy variables.
    dummy_func = st.cache(func=lambda ttl_token, session_token, *func_args, **
                          func_kwargs: func(*func_args, **func_kwargs),
                          **cache_kwargs)

    # This will behave like func but with fancy caching.
    @functools.wraps(func)
    def fancy_cached_func(*func_args, **func_kwargs):
        # Create a token which changes every ttl seconds.
        ttl_token = None
        if ttl is not None:
            ttl_token = int(time.time() / ttl)

        # Create a token which is unique to each session.
        session_token = None
        if unique_to_session:
            ctx = get_report_ctx()
            session_token = ctx.session_id

        # Call the dummy func
        return dummy_func(ttl_token, session_token, *func_args, **func_kwargs)

    return fancy_cached_func
Пример #18
0
    def decorator(func):

        cached_func = st.cache(hash_funcs={
            PDF: id,
            Segment: id,
            Nphthong: id,
            FeatureProcessor: id
        },
                               **kwargs)(func)

        @wraps(cached_func)
        def wrapped(*args, **kwargs):
            status_text = st.subheader(before_msg)
            ret = cached_func(*args, **kwargs)
            status_text.subheader(before_msg + '\t' + after_msg)
            return ret

        return wrapped
Пример #19
0
def explore(config, disable_cache=False):
    if not disable_cache:
        get_state = st.cache(persist=False,
                             allow_output_mutation=True)(_get_state)
    else:
        get_state = _get_state
    dset = get_state(config)
    dset.expand = True
    st.title("Dataset Explorer: {}".format(type(dset).__name__))

    idx = st.sidebar.slider("index", 0, len(dset), 0)
    if st.sidebar.button("sample"):
        idx = np.random.choice(len(dset))

    show_example(dset, idx)

    st.header("config")
    cfg_string = pp2mkdtable(config, jupyter_style=True)
    cfg = st.markdown(cfg_string)
def read_data(year, mode='offline'):
    name_map = {"id":'ID',
        "case_number":'Case Number',
        "date":'Date',
        "block":'Block',
        "iucr":'IUCR',
        "primary_type":'Primary Type',
        "description":'Description',
        "location_description":'Location Description',
        "arrest":'Arrest',
        "domestic":'Domestic',
        "beat":'Beat',
        "district":'District',
        "ward":'Ward',
        "community_area":'Community Area',
        "fbi_code":'FBI Code',
        "year":'Year',
        "updated_on":'Updated On',
        "x_coordinate":'X Coordinate',
        "y_coordinate":'Y Coordinate',
        "latitude":'Latitude',
        "longitude":'Longitude',
        "location":'Location'
    }
    if mode == 'offline':
        try:
            csv_cache = st.cache(pd.read_csv)
            results = csv_cache('https://raw.githubusercontent.com/CMU-IDS-2020/a3-05839-a3-fch-ljy/master/subset.csv')
            return results[results.loc[:,'Year']==year]
        except: # For testing
            st.write('Incomplete Data Readed, Only for testing')
            client = Socrata("data.cityofchicago.org", None)
            results = client.get("ijzp-q8t2", where="year={:d}".format(year), limit=100000)
            results = pd.DataFrame.from_records(results)
            results.columns = [name_map[name] for name in list(results.columns)]
            return results[results.loc[:,'Year']==str(year)]
    else:
        client = Socrata("data.cityofchicago.org", None)
        results = client.get_all("ijzp-q8t2", where="year={:d}".format(year))
        results = pd.DataFrame.from_records(results)
        results.columns = [name_map[name] for name in list(results.columns)]
        return results[results.loc[:,'Year']==str(year)]
Пример #21
0
def pps_plot(df, columns, num_cols):
    pps_cache = st.cache(pps.matrix)
    corr = pps_cache(df)
    corr_fig = make_subplots(rows=1,
                             cols=2,
                             subplot_titles=('PPS', 'Standard Correlation'))
    corr_fig.add_trace(go.Heatmap(z=corr,
                                  x=columns,
                                  y=columns,
                                  coloraxis='coloraxis'),
                       row=1,
                       col=1)
    corr_fig.add_trace(go.Heatmap(z=df[num_cols].corr(),
                                  x=num_cols,
                                  y=None,
                                  coloraxis='coloraxis'),
                       row=1,
                       col=2)
    corr_fig.update_layout(coloraxis={'colorscale': 'jet'})
    return corr_fig
Пример #22
0
def add_qa():

	st.title("Question Answering")
	st.write("Question Answering is a state-of-the-art research topic that has been arising with the evolution of Deep Learning algorithms. You write a query regarding a long input text, the algorithm goes through the text and identifies the region of the text which is the most likely to contain the answer. The graph below displays 'attention', the process by which neural networks learn to focus on certain parts of the long text. The darker the cell, the most important the information was to identify the answer.")
	
	predictor = st.cache(
	       pretrained.bidirectional_attention_flow_seo_2017,
	       ignore_hash=True  # the Predictor is not hashable
	)()

	article_choice = st.sidebar.selectbox("Article to query", ["Netflix", "Italy"])

	if article_choice == "Netflix":
		passage = st.text_area("Article", """Netflix, Inc. is an American media-services provider and production company headquartered in Los Gatos, California, founded in 1997 by Reed Hastings and Marc Randolph in Scotts Valley, California. The company's primary business is its subscription-based streaming service which offers online streaming of a library of films and television programs, including those produced in-house. As of April 2019, Netflix had over 148 million paid subscriptions worldwide, including 60 million in the United States, and over 154 million subscriptions total including free trials. It is available worldwide except in mainland China (due to local restrictions), Syria, North Korea, and Crimea (due to US sanctions). The company also has offices in the Netherlands, Brazil, India, Japan, and South Korea. Netflix is a member of the Motion Picture Association (MPA).
			Netflix's initial business model included DVD sales and rental by mail, but Hastings abandoned the sales about a year after the company's founding to focus on the initial DVD rental business. Netflix expanded its business in 2010 with the introduction of streaming media while retaining the DVD and Blu-ray rental business. The company expanded internationally in 2010 with streaming available in Canada, followed by Latin America and the Caribbean. Netflix entered the content-production industry in 2012, debuting its first series Lilyhammer.
			Since 2012, Netflix has taken more of an active role as producer and distributor for both film and television series, and to that end, it offers a variety of "Netflix Original" content through its online library. By January 2016, Netflix services operated in more than 190 countries. Netflix released an estimated 126 original series and films in 2016, more than any other network or cable channel. Their efforts to produce new content, secure the rights for additional content, and diversify through 190 countries have resulted in the company racking up billions in debt: $21.9 billion as of September 2017, up from $16.8 billion from the previous year. $6.5 billion of this is long-term debt, while the remaining is in long-term obligations. In October 2018, Netflix announced it would raise another $2 billion in debt to help fund new content.
			""")
		question = st.text_input("Question", "Where are the headquarters of Netflix?")
		
	elif article_choice == "Italy":
		passage = st.text_area("Passage", "Italy, officially the Italian Republic is a European country consisting of a peninsula delimited by the Alps and surrounded by several islands. Italy is located in south-central Europe, and it is also considered a part of western Europe. The country covers a total area of 301,340 km2 (116,350 sq mi) and shares land borders with France, Switzerland, Austria, Slovenia, and the enclaved microstates of Vatican City and San Marino. Italy has a territorial exclave in Switzerland (Campione) and a maritime exclave in the Tunisian Sea (Lampedusa). With around 60 million inhabitants, Italy is the fourth-most populous member state of the European Union. Due to its central geographic location in Southern Europe and the Mediterranean, Italy has historically been home to myriad peoples and cultures. In addition to the various ancient peoples dispersed throughout modern-day Italy, the most predominant being the Indo-European Italic peoples who gave the peninsula its name, beginning from the classical era, Phoenicians and Carthaginians founded colonies mostly in insular Italy, Greeks established settlements in the so-called Magna Graecia of Southern Italy, while Etruscans and Celts inhabited central and northern Italy respectively. An Italic tribe known as the Latins formed the Roman Kingdom in the 8th century BC, which eventually became a republic with a government of the Senate and the People. The Roman Republic initially conquered and assimilated its neighbours on the peninsula, eventually expanding and conquering parts of Europe, North Africa and Asia. By the first century BC, the Roman Empire emerged as the dominant power in the Mediterranean Basin and became a leading cultural, political and religious centre, inaugurating the Pax Romana, a period of more than 200 years during which Italy's law, technology, economy, art, and literature developed. Italy remained the homeland of the Romans and the metropole of the empire, whose legacy can also be observed in the global distribution of culture, governments, Christianity and the Latin script.")
		question = st.text_input("Question", "How large is Italy?")
	
	result = predictor.predict(question, passage)

	# From the result, we want "best_span", "question_tokens", and "passage_tokens"
	start, end = result["best_span"]
	
	question_tokens = result["question_tokens"]
	passage_tokens = result["passage_tokens"]
	mds = [f"**{token}**" if start <= i <= end else token if start - 10 <= i <= end + 10 else "" for i, token in enumerate(passage_tokens)]
	st.markdown(" ".join(mds))

	attention = result["passage_question_attention"]

	plt.figure(figsize=(12,12))
	sns.heatmap(attention, cmap="YlGnBu")
	plt.autoscale(enable=True, axis='x')
	plt.xticks(np.arange(len(question_tokens)), labels=question_tokens)
	st.pyplot()
Пример #23
0
with col4:
    result4 = st.number_input('F4')
    st.text(f'{result4}')

col5, col6, col7, col8 = st.beta_columns(4)

with col5:
    result5 = st.number_input('F5')
    st.text(f'{result1}')

with col6:
    result6 = st.number_input('F6')
    st.text(f'{result2}')

with col7:
    result7 = st.number_input('F7')
    st.text(f'{result3}')

with col8:
    result8 = st.number_input('F8')
    st.text(f'{result4}')

st.cache() # chache to store the model 
model = load(open('testmodel.pkl', 'rb'))

input = np.array([[float(result1), float(result2), float(result3), float(result4), float(result5), float(result6), float(result7),
                   float(result8)]])

st.text(model.predict(input)[0])
Пример #24
0
import streamlit as st
import pandas as pd
import statsmodels.api as sm
import datetime
import matplotlib.pyplot as plt
from constants import states


def format_func(option):
    return states[option]


st.cache()


def deaths_display():
    st.write("""
            # Covid19 Prediction App
            This page predicts the **Deaths** from the virus in India!
            """)
    df = pd.read_csv(
        'https://api.covid19india.org/csv/latest/state_wise_daily.csv')
    df['Date'] = df['Date'].replace('Sept', 'Sep', regex=True)
    df['Date'] = pd.to_datetime(df['Date'], format="%d-%b-%y")
    df = df.set_index('Status')
    df.drop(['Confirmed', 'Recovered'], inplace=True)
    df = df.reset_index()
    og_df = df
    og_df = og_df.set_index('Date')
    state = st.sidebar.selectbox('State',
                                 options=list(states.keys()),
Пример #25
0
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import numpy as np
import seaborn as sns
import warnings
st.cache(persist=True)
st.set_option('deprecation.showPyplotGlobalUse',
              False)  #Do not show the deprecation warning

st.title('Dashboard for Data Visualization')
st.markdown('The dashboard will visualize the features of the review dataset')

filepath = 'wet_dog_food_preprocessed.csv'
dataset = pd.read_csv(filepath, index_col=0, encoding="ISO-8859-1")

st.sidebar.title("Data Visualization for Review data")
#st.sidebar.checkbox("Show Analysis by graph", True, key=1)
options = [
    'Top 10 brands based on reviews', 'Top 10 products based on reviews',
    'Heatmap of distribution of reviews over time',
    'Review count for a rating', 'Sentence count in reviews',
    'Word count in sentences', 'Top 20 authors based on number of reviews',
    'Top 10 brands based on ratings', 'Average review length for a rating',
    'Top 10 products based on ratings',
    'Distribution of total word count in reviews',
    'Top 10 authors based on length of reviews'
]
Пример #26
0
def app():
    # Set header and subheader of the app
    st.header('Google Analytics')
    st.subheader(
        "Summary stats of consumer behaviour from Google Analytics data.")

    # ADWORDS BY NATALI
    # Times
    st.subheader("ADWORDS")
    st.write("Times of the day when Adwords ads are most effective")
    adwordsbyhour = st.cache(pd.read_csv)("AdwordsByHour.csv")
    adwordsbyhour_data = adwordsbyhour.copy()

    is_check = st.checkbox("Display Data")
    if is_check:
        st.write(adwordsbyhour_data)
    adwordsbyhour_data['Hora'] = pd.to_datetime(
        adwordsbyhour_data['Hora'], format='%H:%M:%S')
    adwordsbyhour_data['Hora'] = adwordsbyhour_data['Hora'].dt.hour
    adwordsbyhour_data = adwordsbyhour_data.sort_values(
        by='Hora', ascending=True)
    graf = adwordsbyhour_data.set_index('Hora')
    st.line_chart(graf)

    # searches
    st.subheader("SEARCHES")
    st.write("Most searched words")
    adwords1 = st.cache(pd.read_csv)("Adwords1.csv")
    adwords1_data = adwords1.copy()

    today = datetime.date.today()
    tomorrow = today + datetime.timedelta(days=1)
    start_date = st.date_input('Start date', datetime.date(2019, 1, 1))
    start_date = np.datetime64(start_date)
    end_date = st.date_input('End date', datetime.date(2019, 1, 31))
    end_date = np.datetime64(end_date)
    adwords1_data['Fecha'] = pd.to_datetime(
        adwords1_data['Fecha'], format='%Y-%m-%d')
    mask = (adwords1_data['Fecha'] > start_date) & (
        adwords1_data['Fecha'] <= end_date)
    adwords1_data = adwords1_data.loc[mask]

    is_check1 = st.checkbox("Display Searches")
    if is_check1:
        st.write(adwords1_data)

    st.bar_chart(adwords1_data.ConsultaBusqueda)

    # placements
    st.subheader("PLACEMENTS")
    st.write("Most effective placement URLs")
    adwords3 = st.cache(pd.read_csv)("Adwords3.csv")
    adwords3_data = adwords3.copy()

    today = datetime.date.today()
    tomorrow = today + datetime.timedelta(days=1)
    start_date = st.date_input('Start date', datetime.date(2020, 1, 1))
    start_date = np.datetime64(start_date)
    end_date = st.date_input('End date', datetime.date(2020, 1, 31))
    end_date = np.datetime64(end_date)
    adwords3_data['Fecha'] = pd.to_datetime(
        adwords3_data['Fecha'], format='%Y-%m-%d')
    mask = (adwords3_data['Fecha'] > start_date) & (
        adwords3_data['Fecha'] <= end_date)
    adwords3_data = adwords3_data.loc[mask]
    adwords3_data = adwords3_data[~adwords3_data['URL del emplazamiento'].astype(
        str).str.startswith('mobileapp::')]

    is_check3 = st.checkbox("Display Table")
    if is_check3:
        st.write(adwords3_data)

    url = [adwords3_data[i]
           for i in adwords3_data.columns if 'URL del emplazamiento' in i]
    df_url = pd.concat(url).reset_index()
    df_url = df_url.rename(columns={0: 'Urls'})
    rurl = df_url.groupby(
        by=['index', 'URL del emplazamiento']).count().reset_index()
    rulr = rurl.groupby('URL del emplazamiento').agg(
        'count').sort_values('index', ascending=False).head(10)

    col3, col4 = st.beta_columns(2)
    with col3:
        st.write(rulr)
    with col4:
        st.bar_chart(rulr)

    # days
    st.subheader("WEEKDAYS")
    st.write("Ads effectiveness by day of the week.")
    adwords2 = st.cache(pd.read_csv)("Adwords2.csv")
    adwords2_data = adwords2.copy()

    today = datetime.date.today()
    tomorrow = today + datetime.timedelta(days=1)
    start_date1 = st.date_input('Start Date', datetime.date(2020, 1, 1))
    start_date1 = np.datetime64(start_date1)
    end_date1 = st.date_input('End Date', datetime.date(2020, 1, 31))
    end_date1 = np.datetime64(end_date1)
    adwords2_data['Fecha'] = pd.to_datetime(
        adwords2_data['Fecha'], format='%Y-%m-%d')
    mask = (adwords2_data['Fecha'] > start_date1) & (
        adwords2_data['Fecha'] <= end_date1)
    adwords2_data = adwords2_data.loc[mask]

    st.bar_chart(adwords2_data.day)

   # Wilson's charts
    system_rebote = pd.read_csv('./system_rebote.csv', sep=',')
    system_clean_f = pd.read_csv('./system_clean_f.csv', sep=',')
    metrics = ['1 Day', '3 Days', '1 Week',
               '1 Month', '3 Months', '6 Months', '1 Year']
    cols = st.selectbox('Time range to watch', metrics)
    # let's ask the user which column should be used as Index
    if cols in metrics:
        range_time_to_show = cols
        if(cols == '1 Day'):
            number_of_days = 2
        elif(cols == '3 Days'):
            number_of_days = 6
        elif(cols == '1 Week'):
            number_of_days = 14
        elif(cols == '1 Month'):
            number_of_days = 60
        elif(cols == '3 Months'):
            number_of_days = 180
        elif(cols == '6 Months'):
            number_of_days = 360
        else:
            number_of_days = 720
    # p rebote
    p_rebote_lastest_w = system_rebote.groupby('Date')['Porcentaje de rebote'].mean(
    ).reset_index().sort_values('Date', ascending=False).head(number_of_days)

    p_robote_today = ((p_rebote_lastest_w['Porcentaje de rebote'].iloc[0:int(number_of_days/2)].mean(
    )/p_rebote_lastest_w['Porcentaje de rebote'].iloc[int(number_of_days/2):int(number_of_days)].mean())*100)-100
    st.markdown('**Bounce Rate:**')

    # st.write("<style>red{color:red} orange{color:orange}....</style>)
    # color = st.color_picker('Pick A Color', '#00f900')
    st.write(round(p_robote_today, 2), '%')
    fig = px.bar(p_rebote_lastest_w.head(int(number_of_days/2)), x='Date',
                 y='Porcentaje de rebote', color_discrete_sequence=px.colors.qualitative.Safe)
    st.plotly_chart(fig, use_container_width=True)
    # sessions
    p_sessions_lastest_w = system_clean_f.groupby('Date')['Sesiones'].sum(
    ).reset_index().sort_values('Date', ascending=False).head(number_of_days)
    p_sessions_today = ((p_sessions_lastest_w['Sesiones'].iloc[0:int(number_of_days/2)].mean(
    )/p_sessions_lastest_w['Sesiones'].iloc[int(number_of_days/2):int(number_of_days)].mean())*100)-100
    st.markdown('**Sessions:**')
    st.write(round(p_sessions_today, 2), '%')
    fig2 = px.bar(p_sessions_lastest_w.head(int(number_of_days/2)), x='Date',
                  y='Sesiones', color_discrete_sequence=px.colors.qualitative.Antique)
    st.plotly_chart(fig2, use_container_width=True)
    # average pageviews
    p_pageviews_lastest_w = system_clean_f.groupby('Date')['Número de páginas vistas'].mean(
    ).reset_index().sort_values('Date', ascending=False).head(number_of_days)
    p_pageviews_today = ((p_pageviews_lastest_w['Número de páginas vistas'].iloc[0:int(number_of_days/2)].mean(
    )/p_pageviews_lastest_w['Número de páginas vistas'].iloc[int(number_of_days/2):int(number_of_days)].mean())*100)-100
    st.markdown('**Average Pageviews:**')
    st.write(round(p_pageviews_today, 2), '%')
    fig3 = px.bar(p_pageviews_lastest_w.head(int(number_of_days/2)), x='Date',
                  y='Número de páginas vistas', color_discrete_sequence=px.colors.qualitative.Dark2)
    st.plotly_chart(fig3, use_container_width=True)
Пример #27
0
#!/usr/bin/env python
# coding: utf-8

# In[51]:

import pandas as pd
import streamlit as st
import numpy as np

visualize = st.cache(pd.read_csv)('Recommendation_1.csv')
visualize2 = visualize.drop(['Unnamed: 0'], axis=1)

is_check = st.checkbox('Display Data')
if is_check:
    st.write(visualize2)

variables = st.sidebar.multiselect('Enter the variables', visualize2.columns)
st.write('You selected these variables', variables)

#selectedUserID= st.selectbox("Choose your UserID: ", np.arange(0, 999, 1))
selectedItemID = st.selectbox("Choose your ItemID:", np.arange(0, 50, 1))
selectedItem = st.multiselect("Select an Item", [
    "Kurkure", "Uncle chips", "Milano Dark Chocolate",
    "Bournville Dark Chocolate", "Chips Ahoy Chunky", "Miranda", "Pepsi",
    "Fanta", "Cococola", "Hersheys Kisses", "Hersheys Nuggets", "Tootsie Pops",
    "Smarties", "Limca", "Mountain Dew", "Haldirams Aloo Bhujia",
    "Haldirams Fried Peanuts", "Rajaram Peanut Candy", "Bharath Chikki",
    "Lilys Brownie Fudge", "Lilys Tart Cookies", "Cadburys Diarymilk",
    "Bournvita", "Boost", "Horlicks", "Maltova", "Milo", "Maple Syrup",
    "Haldirams Khatta Meeta", "Haldirams Bhel Puri", "Keebler Chips Deluxe",
    "Nature Valley Protien Bar", "Quaker Chewy", "Kind Health Grains",
Пример #28
0
        def Prediction(dftest, testfile):
            ## Correlation Drop
            try:
                dftest.drop(corCol, axis=1, inplace=True)
            except:
                pass
            ## Missing Drop
            try:
                dftest.drop(drop_list, axis=1, inplace=True)
            except:
                pass

            def impute_nan_median(df, variable):
                if df[variable].isnull().sum() > 0:
                    df[variable + "NAN"] = np.where(df[variable].isnull(), 1,
                                                    0)
                    df[variable] = df[variable].fillna(
                        df_test[variable].median())

            def impute_nan_cat_mode(df, variable):
                if df[variable].isnull().sum() > 0:
                    df[variable + "NAN"] = np.where(df[variable].isnull(), 1,
                                                    0)
                    frequent = df_test[variable].mode()[0]
                    df[variable].fillna(frequent, inplace=True)

            for i in dftest.columns:
                if (np.dtype(dftest[i]) == "object"):
                    impute_nan_cat_mode(dftest, i)
                else:
                    impute_nan_median(dftest, i)

            ## Outliers

            def outliers_gaussion(df, variable):
                upper_boundary = df_test[variable].mean(
                ) + 3 * df[variable].std()
                lower_boundary = df_test[variable].mean(
                ) - 3 * df[variable].std()
                df[variable] = np.where(df[variable] > upper_boundary,
                                        upper_boundary, df[variable])
                df[variable] = np.where(df[variable] < lower_boundary,
                                        lower_boundary, df[variable])
                return df[variable].describe()

            def outliers_skewed(df, variable):
                IQR = df_test[variable].quantile(
                    0.75) - df_test[variable].quantile(0.25)
                lower_bridge = df_test[variable].quantile(0.25) - (IQR * 1.5)
                upper_bridge = df_test[variable].quantile(0.75) + (IQR * 1.5)
                df[variable] = np.where(df[variable] > upper_bridge,
                                        upper_bridge, df[variable])
                df[variable] = np.where(df[variable] < lower_bridge,
                                        lower_bridge, df[variable])
                return df[variable].describe()

            try:
                if Outliers_handle == "Handle Outliers":
                    for i in numeric_cols:
                        outliers_gaussion(dftest, i)
            except:
                pass

            ## One hot encoding

            def allonehotencoding_test(df):
                for i in encode_list:
                    try:
                        for categories in df[i]:
                            df[i] = np.where(df[i] == categories, 1, 0)
                    except:
                        df[i] = np.where(False, 1, 0)

                return df

            allonehotencoding_test(dftest)
            for i in col_after_endoded_all:
                if i not in dftest.columns:
                    dftest[i] = np.where(False, 1, 0)

            dftest = dftest.loc[:, col_after_endoded_all]
            dftest = dftest.drop(dftest.select_dtypes("object").columns,
                                 axis=1)

            ## feature importance

            if Feature_importance == "Reduce Features":
                dftest = dftest[X_Selected]
            ## Standardization
            if standard_apply == "Apply Standardization":
                try:
                    dftest[g_cols] = scaler_obj.fit_transform(dftest[g_cols])
                except:
                    pass

                for i in s_cols:
                    if scale_s == "Logarithmic Transformation":
                        dftest[i] = np.log(dftest[i].replace(0, 0.01))

                    elif scale_s == "Box Cox Transformation":
                        try:
                            dftest[i], parameters = stat.boxcox(
                                dftest[i].replace(0, 0.01))
                        except:
                            pass
                    else:
                        dftest[i] = dftest[i]**(1 / 1.2)
            ##PCA
            if use_pca == "Use principal component analysis":
                dftest = pca1.transform(dftest)
                dftest = pd.DataFrame(dftest)
            ## Prediction
            try:
                dftest = dftest.rename(
                    columns=lambda x: re.sub('[^A-Za-z0-9_]+', '', x))
            except:
                pass
            try:

                result = clf.predict(dftest)
                results_table = result.iloc[:, 0]
                st.write(results_table)

            except:

                result = clf.predict(dftest)
                output = pd.DataFrame()
                outputkaggle = pd.DataFrame()
                output[target_column] = np.array(result)
                for i in testfile:
                    output[i] = testfile[i]
                st.write(output)
                col_id = list(testfile.columns)[0]
                outputkaggle[col_id] = testfile.iloc[:, 0]
                outputkaggle[target_column] = np.array(result)

                def get_table_download_link(df):

                    csv = df.to_csv(index=False)
                    b64 = base64.b64encode(csv.encode()).decode(
                    )  # some strings <-> bytes conversions necessary here
                    return f'<a href="data:file/csv;base64,{{b64}}" download="Output.csv">Download Output csv file</a>'

                st.markdown(get_table_download_link(output),
                            unsafe_allow_html=True)
                st.cache()
                try:

                    def get_table_download_link_kaggle(df):

                        csv = df.to_csv(index=False)
                        b64 = base64.b64encode(csv.encode()).decode(
                        )  # some strings <-> bytes conversions necessary here
                        return f'<a href="data:file/csv;base64,{{b64}}" download="Submission.csv">Download Submission csv file for Kaggle </a>'

                    st.markdown(get_table_download_link_kaggle(outputkaggle),
                                unsafe_allow_html=True)
                    st.write("https://github.com/MustafaBozkurt84")
                    st.write(
                        "https://streamlit-machine-learning-app.herokuapp.com/"
                    )
                except:
                    pass
#!/usr/bin/env python
# coding: utf-8

# In[51]:


import pandas as pd
import streamlit as st
import numpy as np
import plotly.express as px

visualize = st.cache(pd.read_csv)('Recommendation_1.csv')
visualize2= visualize.drop(['Unnamed: 0'],axis=1)

data = st.cache(pd.read_csv)('dataSubset5000_take2.csv')
df = st.cache(pd.read_csv)('Item_list.csv')
#df= pd.read_csv(r'C:\Users\prafu\Item_list.csv')
df=df.reset_index()
df.rename(columns={'index': 'itemID'}, inplace=True)

data_all=data.merge(df, on='itemID', how='left')


def show_about():
        ''' Home / About page '''
        st.title('Recommendation systems using SVM')
        my_slot1 = st.empty()
        # Appends an empty slot to the app. We'll use this later.

        my_slot2 = st.empty()
        # Appends another empty slot.
Пример #30
0
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import Category20
from bokeh.transform import factor_cmap
from PIL import Image

st.title("Data Visualization using Streamlit")

st.text('The data used for this projects has been downloaded from Kaggle.')
st.text('The records on this file are related to Ted events published between')
st.text('June 2006 and May 2012.')

image = Image.open('Tedlogo.png')
st.image(image, width=560, use_column_width=False, format='PNG')

#Loads data file
ted_data = st.cache(pd.read_csv)('talks.csv')

#Changing the 'publish_date' column data type to 'datetime' as this will allow better data manipulation

ted_data['publish_date'] = pd.to_datetime(ted_data['publish_date'])
ted_data['Year'] = ted_data['publish_date'].dt.year
ted_data['Month'] = ted_data['publish_date'].dt.month

#Dropping rows with empty related tags
ted_data.drop(ted_data[ted_data['related_tags'] == '[]'].index, inplace=True)

#Cleaning the related_tags columns to identify to get the first topic in the list. This topic
#will be used as the main reference topic for the record.

main1 = []
for i in ted_data['related_tags']: