示例#1
0
文件: myapp.py 项目: mehmetsuci/DL
def main():
    menu = ['Pandas-Profiling', 'SweetViz', 'D-tale', 'About']
    option = st.selectbox("Select Tool for Viz", menu)

    if option == 'Pandas-Profiling':
        st.header("Pandas-Profiling")
        data_file = st.file_uploader("Upload_csv", type=['csv'])
        if data_file is not None:
            load_csv = pd.read_csv(data_file)
            st.write(load_csv.head())
            st.success("Successfully uploaded!")
            if st.button('Generate Report'):
                report = ProfileReport(load_csv,
                                       title="CSV Profiling Report",
                                       explorative=True)
                st.write('---')
                st.header('**Pandas Profiling Report**')
                st_profile_report(report)

    elif option == 'SweetViz':
        st.header("SweetViz")
        data_file = st.file_uploader("Upload_csv", type=['csv'])
        st.success("Successfully uploaded!")
        if data_file is not None:
            load_csv = pd.read_csv(data_file)
            st.write(load_csv)
            st.write('---')
            st.header('**SweetViz Profiling Report**')
            if st.button('Generate Report'):
                report = sv.analyze(load_csv)
                report.show_html()
                display_sweetviz("SWEETVIZ_REPORT.html")

    elif option == 'D-tale':
        st.header('D-tale')
        data_file = st.file_uploader("Upload_csv", type=['csv'])
        st.success("Successfully uploaded!")
        if data_file is not None:
            load_csv = pd.read_csv(data_file)
            st.write(load_csv)
            st.write('---')
            st.header('**D-Tale Profiling Report**')
            if st.button('Generate Report'):
                dtale.show(load_csv)
                components.iframe('http://dell-virlgti:40000/dtale/main/1',
                                  width=1500,
                                  height=800,
                                  scrolling=True)
                # st.markdown(html, unsafe_allow_html=True)

    elif menu == 'About':
        st.subheader(
            "Simple tool for better and quick visualization and EDA!!")
        st.write()
        st.write(
            "check out this [link](https://share.streamlit.io/mesmith027/streamlit_webapps/main/MC_pi/streamlit_app.py)"
        )
def display_data():
    data = data_loader.get_data()
    d = dtale.show(data, host="localhost", open_browser=True)
    print("Hit enter to exit.")
    input()
    print("Exiting...")
    d.kill()
示例#3
0
 def _dtale_show(self, *args, **kwargs) -> dtale.views.DtaleData:
     result = dtale.show(*args, **kwargs)
     if self._first_show:
         # when running show for the first time, if that happens in rapid succession, it can cause race conditions
         # internal to dtale
         time.sleep(0.3)
         self._first_show = False
     return result
 def displaydata(self):
     #print(self.triggerData)
     df = pd.DataFrame.from_dict(self.triggerData).T
     d = dtale.show(df, ignore_duplicate=True)
     d.open_browser()
     print(d._url)
     file1 = open('triggers.txt','w')
     file1.write(str(self.triggerData))
    def backTestPatterns(self):
        patternReturns = {}
        for s in self.symboldata:
            print(s)
            for pattern in self.symboldata[s]:
                if(s == self.symbols[0]):
                    patternReturns[pattern] = {}
                    patternReturns[pattern]['POS'] = 0
                    patternReturns[pattern]['NEG'] = 0
                    patternReturns[pattern]['DATA'] = 0
                Postriggers = 0
                Posreturns = 0
                Negstriggers = 0
                Negreturns = 0

                for date, value in self.symboldata[s][pattern].iteritems():
                    try:
                        if(value==0):
                            continue
                        #print(str(date).split(' ')[0])
                        #print(str(datetime.datetime.today()).split(' ')[0])
                        if(str(date).split(' ')[0] == str(datetime.datetime.today()).split(' ')[0]):
                            continue
                        
                        if (value>0):
                            Postriggers = Postriggers + 1
                            tickerData = yfinance.Ticker(s)
                            triggerPrice = self.tickerDf[s].close[str(date)]
                            nextDayPrice = self.tickerDf[s].close[str(date+datetime.timedelta(days=1)).split(' ')[0]]
                            Posreturns += (nextDayPrice/triggerPrice-1)*100
                        if (value<0):
                            Negstriggers = Negstriggers + 1
                            tickerData = yfinance.Ticker(s)
                            triggerPrice = self.tickerDf[s].close[str(date)]
                            nextDayPrice = self.tickerDf[s].close[str(date+datetime.timedelta(days=1)).split(' ')[0]]
                            Negreturns += (nextDayPrice/triggerPrice-1)*100
                    except:
                        continue
                if not (Postriggers==0):
                    patternReturns[pattern]['POS'] += Posreturns/Postriggers
                    patternReturns[pattern]['DATA'] += Postriggers
                if not (Negstriggers==0):
                    patternReturns[pattern]['NEG'] += Negreturns/Negstriggers
                    patternReturns[pattern]['DATA'] += Negstriggers
        for pattern in patternReturns:
            patternReturns[pattern]['POS'] = patternReturns[pattern]['POS']/len(self.symbols)
            patternReturns[pattern]['NEG'] = patternReturns[pattern]['NEG']/len(self.symbols)
        df = pd.DataFrame.from_dict(patternReturns).T
        d = dtale.show(df, ignore_duplicate=True)
        d.open_browser()
        print(patternReturns)
示例#6
0
autos = autos[autos["registration_year"].between(1900, 2016)]

# In[14]:

#Inspecting the data statistics after removal of above identified rows
autos.describe()

# In[15]:

#Inpscting the "registration_year" column in more detail, specifically getting the value_counts and estimating the most common year.

autos["registration_year"].value_counts(normalize=True)

# In[16]:

dtale.show(autos)

# **Write a summary of the above steps, as in the removal of outliers and the calculation of value counts for the registration year.**

# **The next section of analysis could essentially focus on the estimation of mean price and mileage across brands**
#
# - We will group (Aggregate the data) across the most common brands, create dictionaries for each of the variables and save the values
#
#

# In[17]:

count_of_brands = autos["brand"].value_counts(normalize=True)

#Restricting the above analysis to the top 5% of the brands, would be a reasonable assumption here
import pandas as pd
import dtale

data = pd.read_excel("YOUR_FILE_PATH.xlsx") 
d= dtale.show(data)
d.open_browser()
示例#8
0
sweet_report1 = sv.analyze(df_hospital)
sweet_report1.show_html('/Users/hantswilliams/Dropbox/Biovirtua/Python_Projects/ahi/AHI_STATS_507/Week13_Summary/output/sweet_report_df_hospital.html')

sweet_report2 = sv.analyze(df_outpatient)
sweet_report2.show_html('/Users/hantswilliams/Dropbox/Biovirtua/Python_Projects/ahi/AHI_STATS_507/Week13_Summary/output/sweet_report_df_outpatient.html')

sweet_report3 = sv.analyze(df_inpatient)
sweet_report3.show_html('/Users/hantswilliams/Dropbox/Biovirtua/Python_Projects/ahi/AHI_STATS_507/Week13_Summary/output/sweet_report_df_inpatient.html')



# D-tale
import dtale 

d = dtale.show(df_hospital, ignore_duplicate=True)
d.open_browser()
########################################################################################################################
########################################################################################################################
########################################################################################################################







########################################################################################################################
########################################################################################################################
########################################################################################################################
### Automatic Data Cleaning 
示例#9
0
wearables_data_url = 'https://raw.githubusercontent.com/programmablewealth/aavegotchi-stats/master/src/data/wearables/wearables.json'
wearables_data = requests.get(wearables_data_url).json()
wearables_name = {i: wearables_data[str(i)]["0"] for i in wearables_data}

gotchi_sales['Body Item'] = gotchi_sales['Body'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Face Item'] = gotchi_sales['Face'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Eyes Item'] = gotchi_sales['Eyes'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Head Item'] = gotchi_sales['Head'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Left Hand Item'] = gotchi_sales['Left Hand'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Right Hand Item'] = gotchi_sales['Right Hand'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Pet Item'] = gotchi_sales['Pet'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])
gotchi_sales['Background Item'] = gotchi_sales['Background'].apply(
    lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"])

#%%

#%%
gotchi_sales.to_csv('gotchi.csv')
# %%
import dtale
d = dtale.show(gotchi_sales)
d
# %%
示例#10
0
import pandas as pd

symbol="SPY"
period= 1
# GET SYMBOL CALLS
def getOptionCalls(symbol,period):
    try:
        ticker = yf.Ticker(symbol)
        #Expiration periods ex: 1 is the first expiration period
        optionChain = ticker.option_chain(ticker.options[period])
        optionCalls = optionChain[0]
        return optionCalls
    except NameError:
        return NameError    

# GET SYMBOL PUTS
def getOptionPuts(symbol,period):
    try:
        ticker = yf.Ticker(symbol)
        #Expiration periods ex: 1 is the first expiration period
        optionChain = ticker.option_chain(ticker.options[period])
        optionPuts= optionChain[1]
        return optionPuts
    except NameError:
        return NameError   


options=getOptionPuts(symbol,period)
d= dtale.show(options)
d.open_browser()
fig.add_annotation(
    dict(xref='paper',
         yref='paper',
         x=0.01,
         y=0.02,
         xanchor='left',
         yanchor='middle',
         font=dict(family='Arial', size=14, color='purple'),
         showarrow=False,
         text='Dismissed'))
fig.show()

# dtale eds
# pip install dtale
import dtale
d = dtale.show(departures)
d.open_browser()

# pandas-profiling
# pip install pandas-profiling
import pandas_profiling
pandas_profiling.ProfileReport(departures)

# AutoViz
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()

sep = ','
dft = AV.AutoViz(filename="",
                 sep=sep,
                 depVar='fyear',
示例#12
0
#https://github.com/man-group/dtale
#pip install dtale
#pip install yfinance

import dtale
import yfinance as yf


def getSymbolHistoricalPrice(tickerSymbol):
    try:
        ticker = yf.Ticker(tickerSymbol)
        tickerHistPrice = ticker.history("max")
        return tickerHistPrice
    except NameError:
        return NameError


ticker = "SPY"
dfQuoteData = getSymbolHistoricalPrice(ticker)
d = dtale.show(dfQuoteData)
d.open_browser()
示例#13
0
from pandas_profiling import ProfileReport
#profile = ProfileReport(df, title="Tips Dataset", html={'style': {'full_width': True}}, sort="None")
profile = ProfileReport(df) # for large dataset (minimal=True)
profile.to_notebook_iframe()
#profile.to_file(output_file="your_report.html")
 
# For Jupyter Notebook
from pandas_profiling import ProfileReport
profile = ProfileReport(df, explorative=True, minimal = True)
profile.to_file('output.html')
 
### Dtale (EDA)
# For Spyder
import dtale
import plotly.express as px
d = dtale.show(df, ignore_duplicate=True)
d.open_browser()

# For Jupyter Notebook
import dtale
dtale.show(df)

 
### Pandas_ui
import seaborn as sns
df = sns.load_dataset("tips")
from pandas_ui import *
pandas_ui('D:\\KNOWLEDGE KORNER\\ANALYTICS\\MISC\\Practice\\Kaggle & Hackathons\\Tips\\tips.csv')
 
#==============================================================================
### Upload CSV Files
示例#14
0
import pandas as pd
titanic = pd.read_csv('titanic.csv')
print(titanic.head)

#Data EDA in two lines of code using pandas_profiling
import pandas_profiling as pp

profile = pp.ProfileReport(titanic, explorative=True)
profile.to_file('output.html')

#EDA using Sweetviz

import sweetviz as sv
sweet_report = sv.analyze(titanic)
sweet_report.show_html('sweet_report.html')

#EDA using Autoviz

from autoviz.AutoViz_Class import AutoViz_Class
aviz = AutoViz_Class(titanic)

#EDA using dtale
import dtale
dtale.show(titanic, ignore_duplicate=True)
示例#15
0
import dtale
import pandas as pd

df = pd.DataFrame([dict(a=1, b=2, c=3)])

# Assigning a reference to a running D-Tale process
d = dtale.show(df)

# Accessing data associated with D-Tale process
tmp = d.data.copy()
tmp['d'] = 4

# Altering data associated with D-Tale process
# FYI: this will clear any front-end settings you have at the time for this process (filter, sorts, formatting)
d.data = tmp

# Shutting down D-Tale process
d.kill()

# using Python's `webbrowser` package it will try and open your server's default browser to this process
d.open_browser()

# There is also some helpful metadata about the process
d._data_id  # the process's data identifier
d._url  # the url to access the process

d2 = dtale.get_instance(
    d._data_id
)  # returns a new reference to the instance running at that data_id

dtale.instances()  # prints a list of all ids & urls of running D-Tale sessions
示例#16
0
def main():
    """We're going to create a beautiful app with Streamlit"""
    menu = [
        "Accueil", "Pandas Profile", "D-Tale", "Visualisation",
        "Nuage de mots", "Machine Learning", "A propos"
    ]
    selection = st.sidebar.selectbox("Fonctions", menu)

    if selection == "Pandas Profile":
        components.html(ha.alert_panda_prof(), height=190)
        my_data = st.file_uploader("Charger le fichier CSV", type=['csv'])
        if my_data is not None:
            df = pd.read_csv(my_data)
            st.dataframe(df.head(10))
            eda_profil = ProfileReport(df,
                                       title='Pandas Profiling Report...',
                                       explorative=True)
            st_profile_report(eda_profil)
    elif selection == "D-Tale":
        components.html(ha.alert_dtale(), height=190)
        data_file = st.file_uploader("Charger le fichier CSV", type=['csv'])
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            d = dtale.show(df)
            d.open_browser()
            if st.button("Générer le rapport"):
                report = sv.analyze(df)
                report.show_html()
                utils.st_display_sweetviz("SWEETVIZ_REPORT.html")
                components.html(ha.alert_warning(), 1000)

    elif selection == "Visualisation":
        # Image d'entête
        image = Image.open('dataviz.png')
        col2, col1 = st.beta_columns([1, 3])
        col2.image(
            "https://idoc-projets.ias.u-psud.fr/redmine/attachments/download/121/sunburst.gif",
            caption='',
            width=None,
            use_column_width=True)
        col1.image(image, caption='', width=None, use_column_width=True)
        # Visualisation catégorie / rating / reviews
        cat1, cat2 = st.beta_columns(2)
        datas = utils.lire_dataset(my_db_clean)
        fig = px.histogram(datas,
                           x='Rating',
                           y='Category',
                           title='Somme des notes par catégorie',
                           color='Category')
        cat1.plotly_chart(fig)
        fig = px.histogram(datas,
                           x='Reviews',
                           y='Category',
                           title='Somme des commentaires par catégorie',
                           color='Category')
        cat2.plotly_chart(fig)
        # Visualisation sunburst / pie
        perc1, perc2 = st.beta_columns(2)
        fig = px.sunburst(datas,
                          path=['Type', 'Category', 'Genres'],
                          title='Types, Catégories et genres')
        perc1.plotly_chart(fig)
        fig = px.pie(datas,
                     names='Type',
                     title='Pourcentage apllication gratuites/Payantes',
                     color_discrete_sequence=px.colors.sequential.RdBu)
        perc2.plotly_chart(fig)
        # Visualisation histo
        hist1, hist2 = st.beta_columns(2)
        hist_data = [list(datas['Rating'])]
        group_labels = ['Rating']
        fig = ff.create_distplot(hist_data, group_labels)
        hist1.plotly_chart(fig)
        counts, bins = np.histogram(datas.Rating, bins=range(0, 6, 1))
        bins = 0.5 * (bins[:-1] + bins[1:])
        fig = px.bar(x=bins,
                     y=counts,
                     labels={
                         'x': 'Rating',
                         'y': 'Count'
                     },
                     title='Distribution des notes')
        hist2.plotly_chart(fig)
    elif selection == "Nuage de mots":
        # General
        img1, img2 = st.beta_columns(2)
        img1.image('datas/wordcloud/general.png')
        img2.image('datas/wordcloud/free_app.png')
        # Free App
        img3, img4 = st.beta_columns(2)
        img3.image('datas/wordcloud/free_app_pos.png')
        img4.image('datas/wordcloud/free_app_neg.png')
        # Paid app
        img5, img6 = st.beta_columns(2)
        img5.image('datas/wordcloud/paid_app.png')
        img6.image('datas/wordcloud/paid_app_pos.png')
        img7, img8 = st.beta_columns(2)
        img7.image('datas/wordcloud/paid_app_neg.png')
    elif selection == "Machine Learning":
        image = Image.open('machine learning.jpg')
        col1, col2 = st.beta_columns([3, 1])
        col2.image(
            "https://static.wixstatic.com/media/bb7b70_d5fde322f7914060b7d997ba9d506a50~mv2.gif",
            caption='',
            width=None,
            use_column_width=True)
        col1.image(image, caption='', width=None, use_column_width=True)
        if st.checkbox("Afficher le dataset"):
            datas = utils.lire_dataset(my_db)
            st.write(datas.head())
        if st.checkbox("Afficher graph valeurs manquantes"):
            col1, col2 = st.beta_columns([2, 1])
            df = datas.isnull()
            fig = px.imshow(df)
            col1.plotly_chart(fig)
            col2.write(datas.isnull().sum())
            col2.write(
                "On peut voir que la colonne **Rating** contient la plupart des valeurs manquantes. A sa suite on a **Current Ver**, **Adroid ver** et **Type**."
            )
        if st.checkbox("Afficher DB ok"):
            datas = utils.lire_dataset(my_db_clean)
            st.write(datas.head())
            mat1, mat2 = st.beta_columns(2)
            fig = px.scatter_matrix(
                datas,
                dimensions=["Rating", "Reviews", "Size", "Installs", "Price"],
                color="Type",
                symbol="Type",
                title="Matrix de dispersion des variables continues")
            fig.update_traces(diagonal_visible=False)
            mat1.plotly_chart(fig)
            fig = px.imshow(
                datas[["Rating", "Reviews", "Size", "Installs",
                       "Price"]].corr(),
                labels=dict(x="", y="", color="Corrélation"),
            )
            mat2.plotly_chart(fig)
        if st.checkbox("Make model"):
            #mon_score = utils.transform_var_model(my_db_clean)
            mon_score = utils.make_model(my_db_clean)
            st.success(mon_score)
    elif selection == "A propos":
        #st.subheader("Team presentation")
        components.html(hp.pied_de_page(), height=800)
        #components.iframe('http://www.ingemedia.net/',height=1000, scrolling=True)
    else:
        components.html(hp.entete_de_page(), height=1600)