Python series примеры, pandas.series Python примеры использования

Пример #1

0

Показать файл

def stanton_optical_locations():
    raw_html = simple_get('https://www.stantonoptical.com/locations/')
    html = BeautifulSoup(raw_html, 'html.parser')
    state_list = []
    address_list = []
    city_state_zip_list = []
    for ptag in html.find_all('p'):
        for i, content in enumerate(ptag.contents):
            if content.string is not None:
                if content.string is not None:  #filter out the spurious results
                    if i == 0:
                        state_list.append(content.string)
                    elif i == 1:
                        address_list.append(content.string)
                        print(content)
                    elif i == 3:
                        city_state_zip_list.append(content.string)
                        print(content)
                    elif i > 3:
                        continue

    os.chdir(directory_where_you_want_to_save_the_new_file)
    address = pd.series(address_list, name='Addresses')
    city = pd.series(city_state_zip_list, name='City/State/Zip')
    df = pd.concat([address, city], axis=1)
    ##    dictionary = {'Stanton Optical Address':address_list,'Stanton Optical City/State/Zip': city_state_zip_list}
    ##    df = pd.DataFrame.from_dict(dictionary)
    df.to_excel(stanton_file_name, index=False)

Пример #2

0

Показать файл

Файл: misc.py Проект: monkey1992719/episectrad_origin

def cks(data,
        P=10,
        Q=9,
        X=1,
        high_col='High',
        low_col='Low',
        close_col='Close',
        vol_col='Volume',
        fillna=False):

    high = data[high_col]
    low = data[low_col]
    close = data[close_col]

    ATR = atr(data, P)

    phs_ = []
    pls_ = []
    for l in range(len(close)):
        sindex = l - P + 1
        if sindex < 0:
            sindex = 0

        xatr = X * ATR[l]
        maxv = max(high[sindex:l])
        minv = max(low[sindex:l])
        phs = maxv - xatr
        pls = minv + xatr
        phs_.append(phs)
        pls_.append(pls)

    ss = pd.series([])
    ls = pd.series([])

    for l in range(len(close)):
        sindex = l - Q + 1
        if sindex < 0:
            sindex = 0

        maxv = max(phs_[sindex:l])
        minv = max(pls_[sindex:l])
        ss.append(maxv)
        ls.append(minv)

    if fillna:
        ss = ss.replace([np.inf, -np.inf], np.nan).fillna(0)
        ls = ls.replace([np.inf, -np.inf], np.nan).fillna(0)

    return {ss: pd.Series(ss, name='ss'), ls: pd.Series(ls, name='ls')}

Пример #3

0

Показать файл

Файл: tests.py Проект: jzmichal/YelpNLProject

 def test_renameCols(self):
     nested_dict = {
         'dictA': {
             'key_1': 'value_1'
         },
         'dictB': {
             'key_2': 'value_2',
             'key_3': 'value_3'
         }
     }
     srs_1 = pd.series({'dictA key_1': 'value_1'})
     srs_2 = pd.series({'dictB key_2': 'value_2', 'dictB key_3': 'value_3'})
     self.assertEqual(rename_cols("dictA", nested_dict), srs_1,
                      "Test with just one key value pair")
     self.assertEqual(rename_cols("dictB", nested_dict), srs_2,
                      "Test with multiple key value pairs")

Пример #4

0

Показать файл

Файл: MPLboundarylayer.py Проект: dashamstyr/MPLcode

def find_capper(layerdict,dayperiod,typemode='Sub-Type'):
    maxalt=layerdict['mpl'].NRB[0].columns[-1]
    mplindex=layerdict['mpl'].NRB[0].index
    try:
        molalt=layerdict['molecular']['Layer0']['Base']
    except KeyError:
        molalt=pan.series(data=maxalt,index=mplindex)
    
    try:    
        layeralt=layerdict['layers']['Layer0']['Base']
        layertype=layerdict['layers']['Layer0'][typemode]
    except KeyError:
        layeralt=pan.Series(data=maxalt,index=mplindex)
        layertype=pan.Series(data=np.nan,index=mplindex)
    
    PBLalt=layerdict['pbl']
    molalt.fillna(maxalt,inplace=True)
    layeralt.fillna(maxalt,inplace=True)
    
    
    captype=pan.DataFrame(index=mplindex,columns=[dayperiod])
    clearcap=pan.DataFrame(index=mplindex,columns=[dayperiod])
    PBL=pan.DataFrame(index=mplindex,columns=[dayperiod])
    for i in captype.index:
        if layeralt.ix[i]<molalt.ix[i]:
            captype.ix[i]=layertype.ix[i]
            PBL.ix[i]=PBLalt.ix[i]
            clearcap.ix[i]='Other'
            
        else:
            captype.ix[i]=np.nan
            PBL.ix[i]=np.nan
            clearcap.ix[i]='Clear Air'

    return captype,clearcap,PBL

Пример #5

0

Показать файл

def zscoreVect(genes, expDat, tVals,ctt, cttVec):
    res={}
    x=expDat.loc[cttVec == ctt,:]
    for gene in genes:
        xvals=x[gene]
        res[gene]= pd.series(data=zscore(xvals, tVals[ctt]['mean'][gene], tVals[ctt]['sd'][gene]), index=xvals.index.values)
    return res

Пример #6

0

Показать файл

Файл: misc.py Проект: monkey1992719/episectrad_origin

def choppiness(data,
               tp=14,
               high_col='High',
               low_col='Low',
               close_col='Close',
               vol_col='Volume',
               fillna=False):

    high = data[high_col]
    low = data[low_col]

    ATR = atr(data, tp)
    CP = pd.series([])

    for i in range(len(data)):
        if i < tp * 2:
            CP.append(0)
        else:
            nmrt = np.log10(
                np.sum(ATR[i - tp:i]) /
                (max(high[i - tp:i]) - min(low[i - tp:i])))
            dnmnt = np.log10(tp)
            CP.append(round(100 * nmrt / dnmnt))

    if fillna:
        CP = CP.replace([np.inf, -np.inf], np.nan).fillna(0)
    return pd.Series(CP, name='cp')

Пример #7

0

Показать файл

def zuliValue(x,y,v):
    highSum = 0
    totalSum = 0
    lag = len(y)
    base = np.log(lag+1)
    price = pd.series(y,x)
    price.sort()
    y = price.values
    x = list(price.index)
    
    for i in range(lag):
        if y[i] != y[-1]:
            if i != 0 and y[]
            tmp = v[i] * np.log(1.0/np.abs(y[i]-y[-1])*y[-1]) * (np.log(x[i]+1)/base)
            totalSum += tmp
        
        if y[i] > y[-1]:
            highSum +=  tmp

    if totalSum != 0:
        result = highSum/totalSum
    else:
        result = 0
    

    return x[i],result

Пример #8

0

Показать файл

Файл: demo_ner.py Проект: archfool/nlp

def ner_predict(model, x, word2id, label2id, max_len=None, do_word2id=True):
    # 反映射
    id2word = {id: word for word, id in word2id.items()}
    id2label = {id: label for label, id in label2id.items()}
    # 获取最大seq长度
    if max_len == None:
        max_len = max(map(lambda seq: len(seq), x))
    # 规整输入文本
    if do_word2id == True:
        seqs = []
        word_list = []
        for seq in x:
            seq = list(seq)
            word_list.append(seq)
            seq = nn_lib.sentence2id(seq, word2id)
            seqs.append(seq)
        seqs = nn_lib.pad_sequences(seqs, max_len)
    else:
        seqs = x
        word_list = []
        for row in x:
            word_list.append(series(row).map(id2word).tolist())
    seqs = np.array(seqs)
    # 预测标签
    label_id_list = model.infer([seqs])
    # 拼接语料和标签
    corpus_labels = []
    for i in range(len(word_list)):
        corpus_label = []
        for j in range(len(word_list[i])):
            corpus_label.append(
                (word_list[i][j], id2label[label_id_list[i][j]]))
        corpus_labels.append(corpus_label)

    return corpus_labels

Пример #9

0

Показать файл

Файл: primitiveBE.py Проект: rododavid19/stateStreetFND

def CEILING(p):
    #Compute the smallest integer greater than or equal to a / b.
    #TODO Make test suite
    a = p.arguments["a"].parent.data
    b = p.arguments["b"].parent.data
    c = pd.series(a / b)
    result = np.ceil(c)
    p.arguments["result"] = result

Пример #10

0

Показать файл

Файл: primitiveBE.py Проект: rododavid19/stateStreetFND

def FLOOR(p):
    #Compute the largest integer less than or equal to a / b.
    #TODO make test suite
    a = p.arguments["a"].parent.data
    b = p.arguments["b"].parent.data
    c = pd.series(a / b)
    result = np.floor(c)
    p.arguments["result"] = result

Пример #11

0

Показать файл

Файл: gradient_descent(2).py Проект: kelly-pham/Machine-Learning-

def gradient_descent(data_points, b, m, learning_rate, number_of_iteration):
    n = len(data_points)
    array = []

    for i in xrange(number_of_iteration):
        predicted = np.dot(data_points, m)
        m = m - learning_rate / m * np.dot((predicted - value), data_points)
        output = compute_output(data_points, b, m)
    return pd.series(array)

Пример #12

0

Показать файл

Файл: excel_test.py Проект: golu1068/csv_work

def read_csv(file):
    day_sum=[];
    file_dir = os.path.split(file)[0]
    file_name = os.path.split(file)[1]
    
    new_file = os.path.join(file_dir, file_name[:-4]+ '_new_file.csv')
    write_file = open(new_file, 'w', newline='')
    
    df = pd.read_csv(file, low_memory=False)
    header = list(df)
    
    area = np.array(df['F_AREA'])
    area_sum = np.sum(area)
    
    weight = area/area_sum
    ######################################################
    year=2018
    if (calendar.isleap(year) == True):
        period = 366
    else:
        period = 365
    date = pd.date_range('01-01-'+str(year), periods=period)
    
    for i in range(3, 368):
        df[header[i]] = np.array(df[header[i]])*weight
        day_sum.append(np.sum(df[header[i]]))
    df.insert(len(header), 'Weight', weight)
    header = list(df)
    if (len(df) < len(day_sum)):
        df1 = pd.DataFrame(index=list(range(len(df), len(day_sum))))
        df = pd.concat([df, df1])
        df.fillna('')
        df.insert(len(header), 'Date', date)
        header = list(df)
        df.insert(len(header), 'Sum', day_sum)
    else:
        df['Date'] = pd.series(date)
        df['Sum'] = pd.series(day_sum)
        df.fillna('')
    
    df.to_csv(write_file, index=False)
    
    write_file.close()

Пример #13

0

Показать файл

Файл: datalab_beta.py Проект: paulcrep/world-cup-simulation

def get_outliers(s, eps=0.8, min_samples=5):
    '''
    DBSCAN para identificar, vizualizar e remover outliers

    '''

    try:
        dim = len(s.columns)
    except:
        dim = 1
    s = s.dropna()
    x = s.values.reshape(len(s), dim)
    x = StandardScaler().fit_transform(x)
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    model = dbscan.fit(x)
    return series(model.labels_ != stats.mode(model.labels_).mode[0],
                  index=s.index)

Пример #14

0

Показать файл

    def assign_cluster_label(self, kdd_dataset):
        labels = kdd_dataset['label']
        label_names = list(
            map(
                lambda x: pandas.series([
                    labels[i] for i in range(len(self.km.labels_))
                    if self.km.labels_[i] == x
                ]), range(self.n_cluster)))

        # val = ','.join(map(str, label_names))
        for i in range(self.n_cluster):
            print("cluster {} labels: ".format(i))
            print(label_names[i].value_counts())
            label_dict = label_names[i].value_counts().to_dict()
            val = max(label_dict, key=label_dict.get)
            print(type(val))
            self.label_cluster.append(val)
            print("cluster of : ", val)

Пример #15

0

Показать файл

Файл: test_meta_rel_entity.py Проект: elswob/neo4j-build-pipeline

 def test_array_props_conform(self, array_props, meta_rel_df):
     for k, v in array_props.items():
         expected_type = schema_type_mapping[v["items"]["type"]]
         logger.info(f"{k}: {expected_type}")
         parent_is_not_list = (meta_rel_df[k].dropna().apply(
             lambda _: not isinstance(_, list)).pipe(lambda s: sum(s)))
         assert not bool(parent_is_not_list)
         series = pd.series([
             _ for sub_list in meta_rel_df[k].dropna().tolist()
             for _ in sub_list
         ])
         logger.info(series)
         # special handling of nullable integer
         if v["type"] == "integer":
             series = series.astype(pd.int64dtype())
         type_not_conform = series.apply(lambda _: not isinstance(
             _, expected_type)).pipe(lambda s: sum(s))
         assert not bool(type_not_conform)

Пример #16

0

Показать файл

Файл: demo_text2vec.py Проект: archfool/nlp

def load_wiki_corpus(path_data_in=None, path_data_out=None, word2vec=True):
    if path_data_in == None:
        corpus_path = path_nlp + r'zhwiki-latest-pages-articles.xml.bz2'
    else:
        corpus_path = path_data_in
    if path_data_out == None:
        if word2vec == True:
            corpus_processed_path = path_nlp + 'corpus_word2vec.txt'
        else:
            corpus_processed_path = path_nlp + 'corpus_doc2vec.txt'
    else:
        corpus_processed_path = path_data_out
    cc = OpenCC('t2s')
    count = 0
    with open(corpus_processed_path, 'w',
              encoding='utf-8') as corpus_processed:
        corpus = WikiCorpus(corpus_path, lemmatize=False, dictionary={})
        if word2vec == True:
            for doc in corpus.get_texts():
                doc_new = series(doc).apply(lambda x: ' '.join(
                    jieba.cut(cc.convert(x), cut_all=False)))
                corpus_processed.write(' '.join(doc_new) + "\n")
                count += 1
                if (count % 100 == 0):
                    logging.warning('Saved ' + str(count) + ' articles')
                if ((flag_test == True) and (count == 1000)):
                    return
        else:
            corpus.metadata = True
            for doc, (page_id, title) in corpus.get_texts():
                doc_new = TaggedDocument(words=[
                    word for sentence in doc
                    for word in jieba.cut(cc.convert(sentence))
                ],
                                         tags=[cc.convert(title)])
                corpus_processed.write(' '.join(doc_new[0]) + '\t' +
                                       '\t'.join(doc_new[1]) + "\n")
                count += 1
                if (count % 100 == 0):
                    logging.warning('Saved ' + str(count) + ' articles')
                if ((flag_test == True) and (count == 1000)):
                    return
    return

Пример #17

0

Показать файл

Файл: hejase_stats.py Проект: stsmall/Kiribina_Folonzo

def tmrca_half(ts, pop_nodes, pop_ids, outfile):
    """Calculats the tmrca half fx from Hejase et al 2020.

        "...test on the time to the most recent common ancestor of half the haploid
    samples from a given species (TMRCAH). Requiring only half the samples
    allows us to consider partial sweeps and provides robustness to the
    inherent uncertainty in the inferred local trees."

    Parameters
    ----------
    ts : Object
        object of type tskit tree seqeunce.
    pop_nodes : List
        population leaves as integers loaded from file.
    pop_ids : List
        id of population nodes to be written in DataFrame.
    outfile : str
        base name of DataFrame file output.

    Returns
    -------
    None.

    """
    df_list = []
    for pop, nodes in zip(pop_ids, pop_nodes):
        int1, int2, tmrcah_rel, time_rel, time_rel2 = calc_tmrcah(ts, nodes)
        # set up DataFrame
        df_pop = pd.DataFrame({
            "population": pd.Series([pop] * len(int1)),
            "tree_start": pd.Series(int1),
            "tree_end": pd.series(int2),
            "tmrcah": pd.Series(tmrcah_rel),
            "time_rel": pd.Series(time_rel),
            "time_rel2": pd.Series(time_rel2)
        })
        df_list.append(df_pop)
    df_pop_combine = pd.concat(df_list).reset_index(drop=True)
    df_pop_combine.to_csv(f"{outfile}.tmrca_half.csv",
                          na_rep="NAN",
                          index=False)

Пример #18

0

Показать файл

Файл: algorithm.py Проект: ysharma1126/POLLUTION

def matrix(df,date,pollutant):
    df = df.dropna(subset=[pollutant],axis = 0)
    total_rows = df.count()

    A = np.empty([total_rows,total_rows])
    B = np.empty([total_rows])

    #This for loop is used to populate the matrix A and B
    for i in range(0,total_rows): #c1, c2 ..., initial
    final = [df.loc[i,'lon'],df.loc[i,'lat'],df.loc[i,'alt']]
        for j in range(0,total_rows): #q1, q2 ...
            init = [df.loc[j,'lon'],df.loc[j,'lat'],df.loc[j,'alt']]
            coefficient = a(df.loc[j,'uwnd'],df.loc[j,'vwnd'],init[0],init[1], init[2],final[0],final[1],final[2])
            A.itemset((i,j),coefficient)
            B.itemset(i,df.loc[i,pollutant])

    #solve the matrix
    X = np.linalg.solve(A,B)

    #Save as percentages and save it in matrix A
    for m in range(0,total_rows):
        A[m] = numpy.multiply(A[m],X)
        sum_m = np.sum(A[m])
        for n in range(0,total_rows):
            new_value = (A[m,n]/sum_m)*100
            A.itemset((m,n),new_value)

    my_list = []
    #make a series with all the rows, state names and
    for a in range(0,total_rows):
        new_series = pd.series(data = A[a], index = df.state)
        grouppy = new_series.groupby(groupby.index).sum()
        grouppy_dict = grouppy.to_dict()
        listty = [grouppy_dict,df.loc[a,'index']]
        my_list.append(listty)

    #X is the solution, i.e. the concentration of sources, of different places
    #A is the coefficeints multiplying the sources, i.e. the weight of each source
    return my_list

Пример #19

0

Показать файл

Файл: utils.py Проект: erkamalkumar/preprocess_kk_nitj

def _get_value_counts(df, col):
	text = ' '.join(df[col])
	text = text.split()
	freq = pd.series(text).value_counts()
	return freq

Пример #20

0

Показать файл

from pandas import DataFrame as df
from pandas import Series as series

details = {
    'name': ['osa', 'ire', 'ifa'],
    'age': [22, 33, 55],
    'location': ['Africa', 'Cuba', 'Brazil']
}

frame = df(details)
print(frame)

frame = df(details, columns=['name', 'location', 'age', 'salary'])
print(frame)

print(frame['location'])
print(frame.location)
print(frame.loc[1])

frame.salary = 5000
print(frame)

s = series([300, 400], index=[0, 1])
print(s)

frame.salary = s
print(frame)

Пример #21

0

Показать файл

Файл: tempCodeRunnerFile.py Проект: lukewheless/pandas

import pandas as pd

grade = pd.series([87, 100, 94])

print(grade)

array = pd.series(98.6, range(3))

print(array)

Пример #22

0

Показать файл

Файл: test_panda.py Проект: vivek1262/master/master-master/test_panda.py

import numpy as nm
import pandas as pd

dt = nm.array([1, 2, 3, 4])
s = pd.series(dt, index=[10, 20, 30, 40])
print(s)

Пример #23

0

Показать файл

Файл: 200722_covid_scrape_simpler.py Проект: sfpinzon/COVIDtracking

gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Russia'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Turkey'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Brazil'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Chile'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Colombia'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Mexico'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Peru'])
gdeathsxl = gdeathsxl.drop('Population', axis=1)
gdeathsxl = gdeathsxl.reset_index(drop=True)

Locations = pd.series([
    'US', 'Texas', 'Bexar', 'Harris', 'Dallas', 'Tarrant', 'Travis', 'Collin',
    'Hidalgo', 'El Paso', 'Alabama', 'Arizona', 'California', 'Colorado',
    'Conneticut', 'Florida', 'Georgia', 'Louisiana', 'Massachusetts', 'Nevada',
    'New Mexico', 'New York', 'Oklahoma', 'South Carolina', 'Washington',
    'China', 'Belgium', 'Canada', 'France', 'Germany', 'Italy', 'Japan',
    'Korea, South', 'Netherlands', 'Norway', 'Portugal', 'Spain', 'Sweden',
    'Switzerland', 'United Kingdom', 'Egypt', 'South Africa', 'India',
    'Indonesia', 'Iran', 'Philippines', 'Saudi Arabia', 'Singapore',
    'Thailand', 'Poland', 'Russia', 'Turkey', 'Brazil', 'Chile', 'Colombia',
    'Mexico', 'Peru'
])
gcasesxl.loc[:, 'Admin2'] = Locations
gdeathsxl.loc[:, 'Admin2'] = Locations
gcasesxl = gcasesxl.drop(['State', 'Country'], axis=1)
gdeathsxl = gdeathsxl.drop(['State', 'Country'], axis=1)

a = gcasesxl.melt(id_vars='Admin2')

import xlsxwriter

writer = pd.ExcelWriter('covid_tableau_.xlsx', engine='xlsxwriter')

Пример #24

0

Показать файл

Файл: 1_pandas.py Проект: Ajitesh13/Python-OOP

import pandas as pd

data = [1, 2, 3, 4, 5]

a = pd.series(data)

Пример #25

0

Показать файл

# #creating a DataFrame using a dictionary
# import pandas as pd
# dictionary={'fruits':['apples', 'banana','mangoes'], 'count':[10,20,15]}
# df= pd.DataFrame(dictionary)
# print (df)

#creating a DataFrame using series
import pandas as pd
series = pd.series([6, 12], index=['a', 'b'])
df = pd.DataFrame(series)
print(df)

# # MERGE OPERATION
# import pandas as pd
# player=['player1', 'player2','player3']
# point =[8,5,6]
# title= ['game1','game2','game3']
# df1  = pd.DataFrame(['Player':player, 'Points':point, 'Title':title])

Пример #26

0

Показать файл

import pandas as pd

grades = pd.Series([87, 100, 94])

print(grades)

same_grade = pd.series(98.6, range(3))

print(same_grade)

#0 98.6
#1 98.6
#2 98.6
#dtype: float64

print(grades[0])
grades.count()
grades.mean()
grades.min()
grades.max()
grades.std()

print(grades.describe())

#you can specify custome indices with the index keyword argument:
grades = pd.series([87, 100, 94], indexes=['Wally', 'Eva', 'Sam'])

print(grades)

#if you initialize a series with a dictionary, its keys become
#the series' indices, and its values become the series' element values

Пример #27

0

Показать файл

Type "help", "copyright", "credits" or "license()" for more information.
>>> list=[13,54,75]
>>> import usermodule
>>> print ("list=",usermodule.list)
list= [13, 54, 75]
>>> list.append(98)
>>> print(list)
[13, 54, 75, 98]
>>> 
>>> 
>>> import pandas as pa
>>> import numpy as nu
>>> import sys
>>> sys._stdout_=sys.stdout
>>> fruit=nu.array(['pears','mango','kiwi'])
>>> series=pa.series(fruit)
    series=pa.series(fruit)
    print(series)
 0 pears
 1 mango
 2 kiwi
>>> 
>>> 
>>> import random
>>> print("random integer is :",random.randint(1,100))
random integer is : 42
>>> 
>>> 
>>> import sys
>>> sys.path
['', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\Lib\\idlelib', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\python39.zip', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\DLLs', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\lib', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages']

Пример #28

0

Показать файл

Файл: ANN.py Проект: thareddy/Air-Quality-Inspection-and-Predection

print('\n corrmat index:', corrmat.index)

# # Feature Importance
"""Feature importance is an inbuilt class that comes with 
   Tree Based Regressor, we will be using Extra Tree Regressor 
   for extracting the top 10 features for the dataset. """

model = ExtraTreesRegressor()
model.fit(X, y)

print('\n Head of X:')
print(X.head())
print('\n feature importance:', model.feature_importances_)

# # plot graph of feature importances for better visualization
feat_importances = pd.series(model.feature_importances_, index = X.columns)
feat_importances.nlargest(5).plot(kind = 'barh')
plt.show()

# # K Nearest Neighbor Regression
sns.distplot(y)
plt.show()

# # split the data and do train and test on the splitted data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

model = Sequential()
# The Input Layer :
model.add(Dense(128, kernel_initializer = 'normal', input_dim = X_train.shape[1], activation = 'relu'))

# The Hidden Layers :

Пример #29

0

Показать файл

from matplotlib import rcParams
import seaborn as sb

rcParams['figure.figsize']=8,4
sb.set_style='whitegrid'

#cars dataset
mpg.plot(kind='hist')

plt.hist(mpg)

sb.distplot(mpg)

cars.plot(kind='scatter',x='hp',y='mpg',c=['darkgray'],s=150)

sb.regplot(x='hp',y='mpg',data=cars,scatter=true)

sb.pairplot(cars)#scatterplot matrix

cars_df=pd.DataFrame((cars.ix[:,(1,3,4,6)].values),colums=['mpg','disp','hp','wt'])
cars_target=cars.ix[:,9].values
target_names=[0,1]
cars_df['group']=pd.series(cars_target,dtype='category')
sb.pairplot(cars_df,hue='group',palette='hls')

cars.boxplot(column='mpg',by='am')
cars.boxplot(column='wt',by='am')

sb.boxplot(x='am',y='mpg',data=cars,palette='hls')

Пример #30

0

Показать файл

Файл: data_analytics_sample.py Проект: santoshp8097/data-analytics_basics

# -*- coding: utf-8 -*-
"""
Created on Thu Jan  2 18:35:36 2020

@author: Santosh
"""

#list to pandas conversion
import pandas as pd
import numpy as np
np_array = np.array([1, 2, 3, 4, 5])
print(np_array)
new = pd.series(np_array)
print(new)

## pandas to list

import pandas as pd
import numpy as np
new = pd.series([1, 2, 3, 4])
print(new)
print(new.tolist())

##dictonory to pandas and list

import pandas as pd
import numpy as np
ds = {'a': 1, 'b': 2, 'c': 6, 'd': 7}
print(ds)
print(pd.series(ds))
print(ds.tolist())

Пример #31

0

Показать файл

Файл: DataFrame.py Проект: RicadoMM/Python-Data-Sciences

import numpy as np
df = pd.DataFrama(columns=('Columna1','Columna2','Columna3'))
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [40, 50, 60], [23, 35, 37]]), 
                  index= [2.5, 12.6, 4.8, 4.8, 2.5], 
                  columns=[48, 49, 50]) #El index hace de indice, columns = indica el nombre las columnas


#Creacion de un DataFrame partiendo de un diccionario o de una serie de pandas
data = {'Lenguaje':['Python','C#','Java'],
       'Dificultad':['Media','Alta','Muy Alta'],
       'Ejecucion':['No compilado','Compilado','Compilado']}
df = pd.DataFrame(data)

#Partiendo de series de pandas
listado_lenguajes = ['Python','C#','Java']
d = {'Lenguaje': pd.series(['Sin compilar','Compilado','Compilado'],index = listado_lenguajes),
    'Dificultad': pd.series(['Media','Alta','Muy Alta'],index = listado_lenguajes)} 
#Si los indices no coincidieran lo que ocurre es que hay un merge y aparecen todos en el indice general del dataFrame, si resulta que una seria,
#no incluye ese indice aparecera como un NaN el valor asociado a ese indice en esa columna.
df = pd.DataFrame(d)

#Hasta aquí hemos creado de diferentes maneras DataFrames, recordar que los valores importados de un csv con read_csv o de un excel mediante read_excel
#asi como los valores importados con SQL pyodbc son mostrados o interpretados por python como un DataFrame, nosotros podemos crear un dataFrame mediante
#un csv unicamente seleccinando los datos que queremos mediante la creacion de un reader y seleccionando las filas.

#Creacion de un dataFrame con columnas seleccionadas -- en este caso solo lenguaje y dificultad
df = pd.dataFrame(data,columnas={'Lenguaje','Dificultad'})

#Podemos crear columnas nuevas de la siguiente manera.
df['Experiencia'] = 'variable asignar' #se crea todo la columna con la misma variable. Podemos hacerla condicional en funcion de otra del Data frame
df['Rentabilidad'] = df['Salario'] > 35000 #Todas las que cumplan con ese criterio en la columna Salario las indexará True en la nueva columna Rentabilidad.

Пример #32

0

Показать файл

import pandas
import numpy
arr = numpy.array([10, 20, 30, 40, 50, 60])
series = pandas.series(arr)
print(series)

Python series примеры использования