示例#1
0
def main():
    # get_option(param)需要一个参数,并返回下面输出中给出的值
    # display.max_rows 显示默认值。解释器读取此值并显示此值作为显示上限的行
    print(pd.get_option("display.max_rows"))
    print("==" * 32)

    print(pd.get_option("display.max_columns"))
    print("==" * 32)

    # set_option 设置参数值
    pd.set_option("display.max_columns", 80)
    print(pd.get_option("display.max_columns"))
    print("==" * 32)

    # reset_option接受一个参数,并将该值设置为默认值
    pd.reset_option("display.max_columns")
    print(pd.get_option("display.max_columns"))
    print("==" * 32)

    #describe_option打印参数的描述
    print(pd.describe_option("display.max_rows"))
    print("==" * 32)

    # 使用option_context(),可以临时设置该值
    with pd.option_context("display.max_rows", 10):
        print(pd.get_option("display.max_rows"))
        print(pd.get_option("display.max_rows"))
    print("==" * 32)
示例#2
0
mm = pd.options.display.max_rows
mm

pd.Series(np.arange(mm))

# Dar vieno elemento pridėjimas pakeičia vizualizaciją

pd.Series(np.arange(mm + 1))

# Parametrus galime keisti tiesiogiai arba su metodais:
# get_option, set_option, reset_option
#
# Visus parametrus galime matyti

pd.describe_option()

# Parametrus galime filtruoti su regex

pd.describe_option('^display*')

# Norint kad parametrai išliktu juos galime surašyti į ipython starto failą (Linux, ~/.ipython/profile_default/startup).

dfCSV

# Lentelės vaizdą taip pat galime pakeisti
#
# Pvz visas vertes mažesnes už 6 galime pavaizduoti raudona spalva.


# +
sinfo()  # Writes dependencies to `sinfo-requirements.txt` by default

# # Pandas display options
#
# There are many ways to get sample data to work with, including `sklearn.datasets`, `statsmodels.datasets` (includes all R datasets), and `quilt` (a package manager for data sets). More [details can be found in this SO answer](https://stackoverflow.com/a/29956221/2166823). For small examples, I tend to use `seaborn.load_dataset()` since I will import seaborn anyways (note these datasets are just there for the `seaborn` documentation and may change without notice).

# In[2]:

iris = sns.load_dataset('iris')
iris.head()

# It is a little bit annoying to type `head()` every time I want to look at a dataframe. `pandas` has options to control the displayed data frame output and even a nice search interface to find them.

# In[3]:

pd.describe_option('row')

# In[4]:

pd.set_option('display.max_rows', 9)

# We can see that this has changed the current value.

# In[5]:

pd.describe_option('max_row')

# And if we type the `iris` now, we wont get flooded with 60 rows.

# In[6]:
Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/14DmvT0DAGY_tTUW9gAC6jPDVbJ7wv0L4
"""

import pandas as pd
import operator
import matplotlib.pyplot as plt
import seaborn as sns
desired_width = 320

pd.set_option('display.width', desired_width)
pd.set_option('max_colwidth', 400)
pd.describe_option('max_colwidth')
pd.set_option('max_rows', 9999)

pd.set_option('display.max_columns', 20)
df1 = pd.read_csv("twitter.csv",
                  names=[
                      'tweetid', 'created_at', 'user_name', 'text',
                      'favourite_count', 'retweet_count', 'location',
                      'hashtags'
                  ])

df1 = df1.replace(to_replace='NaN', value="Unknown")

print(df1.columns[df1.isna().any()])
print(df1['location'].isna().sum())
print(df1['hashtags'].isna().sum())
示例#5
0
pd.get_option('display.max_rows')
pd.get_option('display.max_columns')
# 设置显示的最大行/列数:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# None表示显示全部的行/列

# 重置为默认设置:
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')

train = pd.read_csv('titanic_train.csv')
pd.set_option('display.max_columns', None)
width = pd.get_option('display.max_colwidth')
# 改变每一列的宽度:
pd.set_option('display.max_colwidth', 500)
# 改变显示的数字中小数的位数:
precision = pd.get_option('display.precision')
pd.set_option('display.precision', 2)
print(precision)
print(train.head())
# 当数字较大时,如何在显示的时候加入逗号:
drinks['x'] = drinks.wine_servings * 1000
drinks['y'] = drinks.total_litres_of_pure_alcohol * 1000
print(drinks.head())
pd.set_option('display.float_format', '{:,}'.format)  # 只有浮点型的格式没有整型
print(drinks.head())
print(pd.describe_option('rows'))  # 显示所有可能的格式
# 重置所有的格式:
pd.reset_option("all")  # 忽略警告
示例#6
0
from dautil import options
import pandas as pd
import numpy as np
from dautil import log_api

printer = log_api.Printer()
print(pd.describe_option('precision'))
print(pd.describe_option('max_rows'))

printer.print('Initial precision', pd.get_option('precision'))
printer.print('Initial max_rows', pd.get_option('max_rows'))

# Random pi's, should use random state if possible
np.random.seed(42)
df = pd.DataFrame(np.pi * np.random.rand(6, 2))
printer.print('Initial df', df)

options.set_pd_options()
printer.print('df with different options', df)

options.reset_pd_options()
printer.print('df after reset', df)
示例#7
0
 
 
 
 
 
 # ----------------------------------------------------------------------
 # 7 - further development
 
 # output 
 df2.to_excel('sample_output.xlsx')
 
 # display option
 pd.set_option('display.max_columns', None)
 pd.reset_option('display.max_rows')
 
 pd.describe_option('float')  # search display options for keyword - 'float'
 pd.reset_option('all')
 
 
 # apply can also apply to dataFrame
 df1.apply(np.argmax, axis=1)
 df1.applymap(int)   # apply this function to all cells
 
 # iterration
 for index, row in df.iterrows():
     print(index, row.abc, row.efg)
     
 # long table
 print(df1)
 df_stack = df1.stack()
 df0 = df_stack.reset_index()
示例#8
0
df[sorted(df.columns)]
# or
df.sort_index(axis=1, ascending=False, inplace=True)

##### Q46 

# Input
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

# Solution
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)
# df

# Show all available options
 pd.describe_option()    


# 47 

# Input
 
df = pd.DataFrame(np.random.random(4)**10, columns=['random'])

# Solution 1: Rounding
df.round(4)

# Solution 2: Use apply to change format
df.apply(lambda x: '%.4f' % x, axis=1)
# or
df.applymap(lambda x: '%.4f' % x)
os.chdir(directorio)
os.listdir()

# Librerias que utilizaremos
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Cargamos nuestros modulos con las funciones utilizdas
exec(open("Cleaning.py").read())
exec(open("Create.py").read())
exec(open("Plots.py").read())
exec(open("Modelos.py").read())
exec(open("Funciones utilizadas.py").read())

pd.describe_option('display')
pd.set_option('display.max_rows', 445)
pd.set_option('display.max_columns', 20)

datos = pd.read_csv('datos_taxi.csv', delimiter=",")
datos = pd.DataFrame(datos)

# # In this step we will apply the cleaning funcion to our data for deleiting the outlier values of data
datos = cleaning(datos)

# Apply the create funcion:
# these funcions generate news columns from the above information and return our
# final dataset
datos = create1(datos)
datos = create2(datos)
示例#10
0
 def describe(self, pat=""):
     return pd.describe_option(pat)
pd.reset_option('display.max_rows') #now maximum rows value is change back to default value which is 60

print("\n maximum rows after reset : ",pd.get_option('display.max_rows'))


print("\n default maximum columns : ",pd.get_option('display.max_columns')) #default maximum column is 60

pd.get_option('display.max_columns',35) 

print("\n maximum columns after changing : ",pd.get_option('display.max_columns',35))

pd.reset_option('display.max_columns')

print("\n maximum columns after reset : ",pd.get_option('display.max_columns'))

print("\n\n\n",pd.describe_option('display.max_rows'))

print("\n\n\n",pd.describe_option('display.max_columns'))


# In[13]:


#Indexing and Selecting Data
    
                #loc() #used for lable based indexing

datadict = {"name":['john smith','stephen hawkins','joe root','shahid afridi','ms dhoni']
           ,"salary":[13000,56000,32000,58000,20400]
           ,"department":['sales','marketing','marketing','sales','testing']}
示例#12
0
import pandas as pd
import numpy as np

print(pd.get_option('display.max_rows'))
print(pd.get_option('display.max_columns'))
print(pd.get_option('display.max_colwidth'))
print(pd.get_option('display.precision'))

print(pd.describe_option('display.max_rows'))
print(pd.describe_option('display.precision'))

pd.set_option('display.max_columns', 5)
print(pd.get_option('display.max_columns'))

my_df = pd.DataFrame(np.arange(1, 1001).reshape(10, 100))
print(my_df)

pd.set_option('display.max_rows', 10)
print(pd.get_option('display.max_columns'))

my_df = pd.DataFrame(np.arange(1, 1001).reshape(100, 10))
print(my_df)

pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
print(pd.get_option('display.max_rows'))
print(pd.get_option('display.max_columns'))

pd.set_option('display.precision', 5)
my_df = pd.DataFrame(np.random.randn(10))
print(my_df)
drinks['x'] = drinks.wine_servings * 1000

drinks['y'] = drinks.total_litres_of_pure_alcohol * 1000
      
drinks.head()

#adding the comma(,) like 3,000 and 1,000,000 etc

pd.set_option('display.float_format', '{:,}'.format) #passing a python format string, meaning use , as 1000 seperator
#affects only float format

drinks.head()

#reading up on the pandas options

pd.describe_option() #all options displayed

pd.describe_option('rows') #only options with rows in the names

#Resetting all option

pd.reset_option('all')


"""***Creating a Pandas DataFrame***"""

import pandas as pd

"""Creating DF from a dictionary"""

pd.DataFrame({'id':[100, 101, 102], 'color': ['red', 'blue','red']})
示例#14
0
USER_TRAIN = 'sub/user_train.csv'
USER_VALID = 'sub/user_valid.csv'
USER_TEST = 'sub/user_test.csv'

NEW_PRODUCT = 'sub/product.csv'
MINI_TRAIN_LABEL = 'sub/mini_train_label.csv'
MINI_TEST_LABEL =  'sub/mini_test_label.csv'

RESULT_FILE = 'res/result.csv'
TMP_ACT_SUBMIT = 'cache/act_submit.csv'
TMP_USER_SUBMIT = 'cache/user_submit.csv'

TMP_MINI_TRAIN = 'cache/mini_train.csv'
TMP_MINI_TEST = 'cache/mini_test.csv'

pd.describe_option("use_inf_as_null")

def simple_choose(group):
    gs = set(group['type'])
    if 2 in gs and 3 not in gs and 4 not in gs:
        group['lastady_addcart_label'] = 1
    else:
        group['lastady_addcart_label'] = 0
    return group[['sku_id', 'user_id', 'lastady_addcart_label']]


def get_data_by_date(data, start_date, end_date):
    '''
    Input:
        data1: (month, day)
    Output:
# max_colwidth
train = pd.read_csv(
    r'D:\PyCharmCommunityEdition2017.2.4\PyTests\Pandas\csvFiles\titanic_train.csv'
)
print(pd.get_option('display.max_colwidth'))  # 50
pd.set_option('display.max_colwidth', 1000)

# precision (decimal point)
print(pd.get_option('precision'))  # 6 (亦可display.precision)
pd.set_option('precision', 3)
print(train.head())
print(train.dtypes)

# bonus tip 1
print(pd.describe_option()
      )  # this is useful while you are not connected to the Internet
print(pd.describe_option('rows'))

# bonus tip 2
pd.reset_option('all')  # reset all options (ignore future warning)

# Lesson 29: Create a pandas DataFrame from another object

import pandas as pd
import numpy as np

print(pd.DataFrame({
    'id': [100, 101, 102],
    'color': ['red', 'blue', 'green']
}))  # columns of 'id'/'color' are listed in alphabetical order
示例#16
0
# Options and Custimization

import pandas as pd

print(pd.get_option("display.max_rows"))
pd.set_option("display.max_rows", 80)

print(pd.get_option("display.max_rows"))
pd.reset_option("display.max_rows")

# alle Optionen beschrieben
pd.describe_option()
print()
print()
pd.describe_option("display.max_rows")

# options are only changed in with block
with pd.option_context("display.max_rows", 10):
    print(pd.get_option("display.max_rows"))
    print(pd.get_option("display.max_rows"))
示例#17
0
import pandas as pd

print(pd.get_option('display.max_rows'))

print(pd.get_option('display.max_columns'))

pd.set_option('display.max_rows', 80)
print(pd.get_option('display.max_rows'))

pd.set_option('display.max_columns', 30)
print(pd.get_option('display.max_columns'))

pd.reset_option('display.max_rows')
print(pd.get_option('display.max_rows'))

pd.describe_option('display.max_rows')

with pd.option_context('display.max_rows', 10):
    print(pd.get_option('display.max_rows'))
print(pd.get_option('display.max_rows'))
示例#18
0
# -*- coding: utf-8 -*-
"""
@author: [email protected]
@site: e-smartdata.org
"""

import numpy as np
import pandas as pd
import seaborn as sns
sns.set()

# setting max rows to display
pd.options.display.max_rows = 10

# %%
df = pd.DataFrame(np.random.randn(100, 3))

# %% get value of option
pd.get_option('display.max_rows')

# %% set value of option
pd.set_option('display.max_rows', 30)
pd.get_option('display.max_rows')

pd.reset_option('display.max_rows')
pd.get_option('display.max_rows')

pd.describe_option('display.max_rows')
pd.describe_option('mode.sim_interactive')
pd.reset_option("display.max_rows")
dataflair5= pd.get_option("display.max_rows")
print(dataflair5)
#60

pd.reset_option("display.max_columns")
dataflair6= pd.get_option("display.max_columns")
print(dataflair6)
#0

#Pandas.describe_option
"""
The .describe_option in Pandas describes the parameter. 
For example .describe_option(“display.max_rows”) would give the details about “display.max_rows” .
"""
print(pd.describe_option("display.max_rows"))
"""
display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 60]
None
"""
示例#20
0
                            columns=['Supplier Name', 'Purchase Date'])
print(data_frame)

print(pd.get_option(
    'display.max_rows'))  #show how many rows will display at most
pd.set_option('display.max_rows', None)  #show all rows
pd.reset_option('display.max_rows')

print(pd.get_option('display.max_columns'))
pd.set_option('display.max_columns', 20)
print(data_frame)

#if some info is hidden as '...' but I want to display it
print(pd.get_option('display.max_colwidth'))
pd.set_option('display.max_colwidth',
              1000)  #None is no admitted,but you can set a large number

#change the number of decimal points
data_frame['Part Number'] = data_frame['Part Number'].astype(float)
pd.get_option('display.precision', 1)
print(data_frame)

#use comma in float or int
pd.set_option('display.float_format', '{:,}'.format)
print(data_frame)

print(pd.describe_option())
print(pd.describe_option('columns'))

#change all the options to the default
pd.reset_option('all')
示例#21
0
import matplotlib.pyplot as plt
import pandas as pd                    #http://pandas.pydata.org/pandas-docs/stable/10min.html
#https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf ==> Pandas cheatsheet
#https://jeffdelaney.me/blog/useful-snippets-in-pandas/
import numpy as np
import re
import os

###########Display options of spyder,jupyter,terminal###################
pd.show_versions() #show all installed library versions
pd.describe_option() #shows all options of HOW TO DISPLAY

pd.get_option('display.max_rows')
pd.set_option('display.max_rows',None) #unlimted display of rows
pd.reset_option('display.max_rows')

pd.get_option('display.max_columns')
pd.set_option('display.max_columns',None) #unlimted display of columns
pd.reset_option('display.max_columns')

#### In jupyter note book
If there is a function to know all the attributes u can use "Shift+Tab twice"

####################################
Canopy Data Import  ==> its a tool to generate python code from auto import CSV file
####################################
PANDAS is built on NUMPY
Dataframe is a grid like representation of data
Series is part of dataframe (like one column data is a SERIES OBJECT)
in otherwords you can say group of SERIES makes a DATAFRAME
pd.get_option('display.max_rows')
# it gives 60 rows 30 head and 30 tail

pd.set_option('display.max_rows', None)

# to back to normal
pd.reset_option('display.max_rows')
drinks

# and for the columns
pd.get_option('display.max_columns')
# and the set and reset are the same

train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()
# we found the there are som .... that mean long name
pd.get_option('display.max_colwidth')
# it gives 50 character to be displayed
# the same for set and reset :D

# now we note that the fair is 4 digits
pd.get_option('display.precision')  # 6
# the same with set and reset

pd.describe_option('rows')

# this is how to reset all things you modified
pd.reset_option('all')
# gives a warning but its okay
示例#23
0
# -*- coding: utf-8 -*-
"""
@author: [email protected]
@site: e-smartdata.org
"""

import numpy as np
import pandas as pd
import seaborn as sns
sns.set()

# %%
pd.set_option('display.max_rows', 999)
pd.set_option('precision', 3)

pd.describe_option('precision')
pd.get_option('expand_frame_repr')
pd.set_option('large_repr', 'info')
# %%
df = pd.DataFrame(np.random.rand(100, 4),
                  columns=['a', 'b', 'cust_tab_dev_prod_dict_flg', 'd'])
print(df)
# %%
df.info()
df.memory_usage()
示例#24
0
def formhandler():
    import os

    data = request.files.get('upload')

    cwd1 = os.getcwd() + "/" + "resumes"
    cwd = os.getcwd()

    filesToRemove = [os.path.join(cwd1, f) for f in os.listdir(cwd1)]
    for f in filesToRemove:
        os.remove(f)
    data.save(cwd + "/" + "resumes", overwrite=True)

    if data and data.file:
        raw = data.file.read()  # This is dangerous for big files
        filename = data.filename
        print("the file is  {0} the length is{1} ".format(filename, len(raw)))
        import sys
        import subprocess

        p = subprocess.call(['python', 'resumeParser.py', filename])
        from win32com.client.gencache import EnsureDispatch
        from win32com.client import constants
        import pandas as pd

        if os.path.exists("Cv_output.html"):
            print("enter output")
            os.remove("Cv_output.html")
        df = os.getcwd() + "/" + "Cv_parser_output.xlsx"

        # ,na_values="Na,
        datsets = pd.read_excel(df, index=False)
        print(datsets, "index false")
        datsets.columns = datsets.columns.str.replace(' ', '')
        datsets.fillna("", inplace=True)
        datsets.drop(columns='extension')
        datsets.to_string(index=False)
        print(datsets, "datasets")

        from IPython.core.display import HTML
        pd.set_option('display.max_colwidth', -1)
        datsets['Dateofbirth'] = datsets['Dateofbirth'].str.replace(
            '\t', ' ').str.replace('\n', ' ')
        datsets['addresses'] = datsets['addresses'].str.replace(
            '\t', ' ').str.replace('\n', ' ')

        datsets['work_experience'] = datsets['work_experience'].str.replace(
            '\t', ' ').str.replace('\n', ' ')
        datsets.dropna()  #to drop if any value in the row has a nan
        datsets.dropna(how='all')

        datsets.style.set_properties(
            **{
                'font-size': '11pt',
                'font-family': 'Calibri',
                'border-collapse': 'collapse',
                'border': '1px solid black'
            }).render()
        datsets.columns = [
            'Extension', 'FileName', 'Name', 'Email', 'Phone',
            ' Date of birth', 'Addresses', 'Degree Name', 'Education year',
            'Institute Name', 'Soft skill', 'Technical skill',
            'Organization year', 'Organization Name', 'Counted Experience'
        ]
        datsets.dropna()
        print(datsets, "datasets of name ")
        Details = datsets.loc[:, "Extension":"Addresses"]
        Education = datsets.loc[:, "Degree Name":"Institute Name"]

        skill = datsets.loc[:, "Soft skill":"Technical skill"]
        Work_experience = datsets.loc[:,
                                      "Organization year":"Counted Experience"]

        Details = Details.T

        skill = skill.T
        work_experience = Work_experience.T

        pd.set_option('colheader_justify', 'center')  # FOR TABLE <th>
        pd.set_option('display.max_columns', None)  # or 1000AN
        pd.set_option('display.max_rows', None)  # or 1000
        pd.set_option('display.max_colwidth', -1)  # or 199
        pd.describe_option('display')

        html_string = '''
        <html>
        <style>
        
        </style>


        <link rel=stylesheet type=text/css href="{{ url_for('static', filename='style.css') }}">
        <h1 align="center" style = "background-color: grey; color: white">Curriculum Vitae Parser</h1>
        <h2><u>Details</u></h2>
        
          {table}
  

        </html>

        '''
        html_string1 = '''
        <html>
       
        <h2><u>Educations</u></h2>
        
          {table}
  

        </html>
'''

        html_string2 = '''
        <html>
       
        <h2><u>Skills</u></h2>
        
          {table}
  

        </html>
        '''

        html_string3 = '''
        <html>
       
        <h2><u>Work Experiences</u></h2>
        
          {table}
  

        </html>
        '''

        # OUTPUT AN HTML FILE
        with open('Cv_output.html', 'w') as f:
            f.write(
                html_string.format(table=Details.to_html(
                    classes='my_class').replace(
                        '<th>', '<th style = "background-color: lightgreen" >'
                    ).replace('<h2></h2>', '<h2>Details</h2>')) +
                html_string1.format(table=Education.to_html(
                    classes='my_class').replace(
                        '<th>', '<th style = "background-color: lightgreen" >'
                    ).replace('<h2>', '<h2>Education Details')) +
                html_string2.format(table=skill.to_html(
                    classes='my_class').replace(
                        '<th>', '<th style = "background-color: lightgreen" >'
                    ).replace('<h2>', '<h2>Education Details')) +
                html_string3.format(table=Work_experience.to_html(
                    classes='my_class').replace(
                        '<th>', '<th style = "background-color: lightgreen" >'
                    ).replace('\t', ' ').replace('\n', '')))
            print("over")
        Javascript(
            '''$('.my_class tbody tr').filter(':last').css('background-color', 'red');
                   ''')

        return template('Cv_output.html')