Пример #1
0
def stanton_optical_locations():
    raw_html = simple_get('https://www.stantonoptical.com/locations/')
    html = BeautifulSoup(raw_html, 'html.parser')
    state_list = []
    address_list = []
    city_state_zip_list = []
    for ptag in html.find_all('p'):
        for i, content in enumerate(ptag.contents):
            if content.string is not None:
                if content.string is not None:  #filter out the spurious results
                    if i == 0:
                        state_list.append(content.string)
                    elif i == 1:
                        address_list.append(content.string)
                        print(content)
                    elif i == 3:
                        city_state_zip_list.append(content.string)
                        print(content)
                    elif i > 3:
                        continue

    os.chdir(directory_where_you_want_to_save_the_new_file)
    address = pd.series(address_list, name='Addresses')
    city = pd.series(city_state_zip_list, name='City/State/Zip')
    df = pd.concat([address, city], axis=1)
    ##    dictionary = {'Stanton Optical Address':address_list,'Stanton Optical City/State/Zip': city_state_zip_list}
    ##    df = pd.DataFrame.from_dict(dictionary)
    df.to_excel(stanton_file_name, index=False)
Пример #2
0
def cks(data,
        P=10,
        Q=9,
        X=1,
        high_col='High',
        low_col='Low',
        close_col='Close',
        vol_col='Volume',
        fillna=False):

    high = data[high_col]
    low = data[low_col]
    close = data[close_col]

    ATR = atr(data, P)

    phs_ = []
    pls_ = []
    for l in range(len(close)):
        sindex = l - P + 1
        if sindex < 0:
            sindex = 0

        xatr = X * ATR[l]
        maxv = max(high[sindex:l])
        minv = max(low[sindex:l])
        phs = maxv - xatr
        pls = minv + xatr
        phs_.append(phs)
        pls_.append(pls)

    ss = pd.series([])
    ls = pd.series([])

    for l in range(len(close)):
        sindex = l - Q + 1
        if sindex < 0:
            sindex = 0

        maxv = max(phs_[sindex:l])
        minv = max(pls_[sindex:l])
        ss.append(maxv)
        ls.append(minv)

    if fillna:
        ss = ss.replace([np.inf, -np.inf], np.nan).fillna(0)
        ls = ls.replace([np.inf, -np.inf], np.nan).fillna(0)

    return {ss: pd.Series(ss, name='ss'), ls: pd.Series(ls, name='ls')}
Пример #3
0
 def test_renameCols(self):
     nested_dict = {
         'dictA': {
             'key_1': 'value_1'
         },
         'dictB': {
             'key_2': 'value_2',
             'key_3': 'value_3'
         }
     }
     srs_1 = pd.series({'dictA key_1': 'value_1'})
     srs_2 = pd.series({'dictB key_2': 'value_2', 'dictB key_3': 'value_3'})
     self.assertEqual(rename_cols("dictA", nested_dict), srs_1,
                      "Test with just one key value pair")
     self.assertEqual(rename_cols("dictB", nested_dict), srs_2,
                      "Test with multiple key value pairs")
Пример #4
0
def find_capper(layerdict,dayperiod,typemode='Sub-Type'):
    maxalt=layerdict['mpl'].NRB[0].columns[-1]
    mplindex=layerdict['mpl'].NRB[0].index
    try:
        molalt=layerdict['molecular']['Layer0']['Base']
    except KeyError:
        molalt=pan.series(data=maxalt,index=mplindex)
    
    try:    
        layeralt=layerdict['layers']['Layer0']['Base']
        layertype=layerdict['layers']['Layer0'][typemode]
    except KeyError:
        layeralt=pan.Series(data=maxalt,index=mplindex)
        layertype=pan.Series(data=np.nan,index=mplindex)
    
    PBLalt=layerdict['pbl']
    molalt.fillna(maxalt,inplace=True)
    layeralt.fillna(maxalt,inplace=True)
    
    
    captype=pan.DataFrame(index=mplindex,columns=[dayperiod])
    clearcap=pan.DataFrame(index=mplindex,columns=[dayperiod])
    PBL=pan.DataFrame(index=mplindex,columns=[dayperiod])
    for i in captype.index:
        if layeralt.ix[i]<molalt.ix[i]:
            captype.ix[i]=layertype.ix[i]
            PBL.ix[i]=PBLalt.ix[i]
            clearcap.ix[i]='Other'
            
        else:
            captype.ix[i]=np.nan
            PBL.ix[i]=np.nan
            clearcap.ix[i]='Clear Air'

    return captype,clearcap,PBL
Пример #5
0
def zscoreVect(genes, expDat, tVals,ctt, cttVec):
    res={}
    x=expDat.loc[cttVec == ctt,:]
    for gene in genes:
        xvals=x[gene]
        res[gene]= pd.series(data=zscore(xvals, tVals[ctt]['mean'][gene], tVals[ctt]['sd'][gene]), index=xvals.index.values)
    return res
Пример #6
0
def choppiness(data,
               tp=14,
               high_col='High',
               low_col='Low',
               close_col='Close',
               vol_col='Volume',
               fillna=False):

    high = data[high_col]
    low = data[low_col]

    ATR = atr(data, tp)
    CP = pd.series([])

    for i in range(len(data)):
        if i < tp * 2:
            CP.append(0)
        else:
            nmrt = np.log10(
                np.sum(ATR[i - tp:i]) /
                (max(high[i - tp:i]) - min(low[i - tp:i])))
            dnmnt = np.log10(tp)
            CP.append(round(100 * nmrt / dnmnt))

    if fillna:
        CP = CP.replace([np.inf, -np.inf], np.nan).fillna(0)
    return pd.Series(CP, name='cp')
Пример #7
0
def zuliValue(x,y,v):
    highSum = 0
    totalSum = 0
    lag = len(y)
    base = np.log(lag+1)
    price = pd.series(y,x)
    price.sort()
    y = price.values
    x = list(price.index)
    
    for i in range(lag):
        if y[i] != y[-1]:
            if i != 0 and y[]
            tmp = v[i] * np.log(1.0/np.abs(y[i]-y[-1])*y[-1]) * (np.log(x[i]+1)/base)
            totalSum += tmp
        
        if y[i] > y[-1]:
            highSum +=  tmp

    if totalSum != 0:
        result = highSum/totalSum
    else:
        result = 0
    

    return x[i],result
Пример #8
0
def ner_predict(model, x, word2id, label2id, max_len=None, do_word2id=True):
    # 反映射
    id2word = {id: word for word, id in word2id.items()}
    id2label = {id: label for label, id in label2id.items()}
    # 获取最大seq长度
    if max_len == None:
        max_len = max(map(lambda seq: len(seq), x))
    # 规整输入文本
    if do_word2id == True:
        seqs = []
        word_list = []
        for seq in x:
            seq = list(seq)
            word_list.append(seq)
            seq = nn_lib.sentence2id(seq, word2id)
            seqs.append(seq)
        seqs = nn_lib.pad_sequences(seqs, max_len)
    else:
        seqs = x
        word_list = []
        for row in x:
            word_list.append(series(row).map(id2word).tolist())
    seqs = np.array(seqs)
    # 预测标签
    label_id_list = model.infer([seqs])
    # 拼接语料和标签
    corpus_labels = []
    for i in range(len(word_list)):
        corpus_label = []
        for j in range(len(word_list[i])):
            corpus_label.append(
                (word_list[i][j], id2label[label_id_list[i][j]]))
        corpus_labels.append(corpus_label)

    return corpus_labels
Пример #9
0
def CEILING(p):
    #Compute the smallest integer greater than or equal to a / b.
    #TODO Make test suite
    a = p.arguments["a"].parent.data
    b = p.arguments["b"].parent.data
    c = pd.series(a / b)
    result = np.ceil(c)
    p.arguments["result"] = result
Пример #10
0
def FLOOR(p):
    #Compute the largest integer less than or equal to a / b.
    #TODO make test suite
    a = p.arguments["a"].parent.data
    b = p.arguments["b"].parent.data
    c = pd.series(a / b)
    result = np.floor(c)
    p.arguments["result"] = result
def gradient_descent(data_points, b, m, learning_rate, number_of_iteration):
    n = len(data_points)
    array = []

    for i in xrange(number_of_iteration):
        predicted = np.dot(data_points, m)
        m = m - learning_rate / m * np.dot((predicted - value), data_points)
        output = compute_output(data_points, b, m)
    return pd.series(array)
Пример #12
0
def read_csv(file):
    day_sum=[];
    file_dir = os.path.split(file)[0]
    file_name = os.path.split(file)[1]
    
    new_file = os.path.join(file_dir, file_name[:-4]+ '_new_file.csv')
    write_file = open(new_file, 'w', newline='')
    
    df = pd.read_csv(file, low_memory=False)
    header = list(df)
    
    area = np.array(df['F_AREA'])
    area_sum = np.sum(area)
    
    weight = area/area_sum
    ######################################################
    year=2018
    if (calendar.isleap(year) == True):
        period = 366
    else:
        period = 365
    date = pd.date_range('01-01-'+str(year), periods=period)
    
    for i in range(3, 368):
        df[header[i]] = np.array(df[header[i]])*weight
        day_sum.append(np.sum(df[header[i]]))
    df.insert(len(header), 'Weight', weight)
    header = list(df)
    if (len(df) < len(day_sum)):
        df1 = pd.DataFrame(index=list(range(len(df), len(day_sum))))
        df = pd.concat([df, df1])
        df.fillna('')
        df.insert(len(header), 'Date', date)
        header = list(df)
        df.insert(len(header), 'Sum', day_sum)
    else:
        df['Date'] = pd.series(date)
        df['Sum'] = pd.series(day_sum)
        df.fillna('')
    
    df.to_csv(write_file, index=False)
    
    write_file.close()
Пример #13
0
def get_outliers(s, eps=0.8, min_samples=5):
    '''
    DBSCAN para identificar, vizualizar e remover outliers

    '''

    try:
        dim = len(s.columns)
    except:
        dim = 1
    s = s.dropna()
    x = s.values.reshape(len(s), dim)
    x = StandardScaler().fit_transform(x)
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    model = dbscan.fit(x)
    return series(model.labels_ != stats.mode(model.labels_).mode[0],
                  index=s.index)
Пример #14
0
    def assign_cluster_label(self, kdd_dataset):
        labels = kdd_dataset['label']
        label_names = list(
            map(
                lambda x: pandas.series([
                    labels[i] for i in range(len(self.km.labels_))
                    if self.km.labels_[i] == x
                ]), range(self.n_cluster)))

        # val = ','.join(map(str, label_names))
        for i in range(self.n_cluster):
            print("cluster {} labels: ".format(i))
            print(label_names[i].value_counts())
            label_dict = label_names[i].value_counts().to_dict()
            val = max(label_dict, key=label_dict.get)
            print(type(val))
            self.label_cluster.append(val)
            print("cluster of : ", val)
 def test_array_props_conform(self, array_props, meta_rel_df):
     for k, v in array_props.items():
         expected_type = schema_type_mapping[v["items"]["type"]]
         logger.info(f"{k}: {expected_type}")
         parent_is_not_list = (meta_rel_df[k].dropna().apply(
             lambda _: not isinstance(_, list)).pipe(lambda s: sum(s)))
         assert not bool(parent_is_not_list)
         series = pd.series([
             _ for sub_list in meta_rel_df[k].dropna().tolist()
             for _ in sub_list
         ])
         logger.info(series)
         # special handling of nullable integer
         if v["type"] == "integer":
             series = series.astype(pd.int64dtype())
         type_not_conform = series.apply(lambda _: not isinstance(
             _, expected_type)).pipe(lambda s: sum(s))
         assert not bool(type_not_conform)
Пример #16
0
def load_wiki_corpus(path_data_in=None, path_data_out=None, word2vec=True):
    if path_data_in == None:
        corpus_path = path_nlp + r'zhwiki-latest-pages-articles.xml.bz2'
    else:
        corpus_path = path_data_in
    if path_data_out == None:
        if word2vec == True:
            corpus_processed_path = path_nlp + 'corpus_word2vec.txt'
        else:
            corpus_processed_path = path_nlp + 'corpus_doc2vec.txt'
    else:
        corpus_processed_path = path_data_out
    cc = OpenCC('t2s')
    count = 0
    with open(corpus_processed_path, 'w',
              encoding='utf-8') as corpus_processed:
        corpus = WikiCorpus(corpus_path, lemmatize=False, dictionary={})
        if word2vec == True:
            for doc in corpus.get_texts():
                doc_new = series(doc).apply(lambda x: ' '.join(
                    jieba.cut(cc.convert(x), cut_all=False)))
                corpus_processed.write(' '.join(doc_new) + "\n")
                count += 1
                if (count % 100 == 0):
                    logging.warning('Saved ' + str(count) + ' articles')
                if ((flag_test == True) and (count == 1000)):
                    return
        else:
            corpus.metadata = True
            for doc, (page_id, title) in corpus.get_texts():
                doc_new = TaggedDocument(words=[
                    word for sentence in doc
                    for word in jieba.cut(cc.convert(sentence))
                ],
                                         tags=[cc.convert(title)])
                corpus_processed.write(' '.join(doc_new[0]) + '\t' +
                                       '\t'.join(doc_new[1]) + "\n")
                count += 1
                if (count % 100 == 0):
                    logging.warning('Saved ' + str(count) + ' articles')
                if ((flag_test == True) and (count == 1000)):
                    return
    return
Пример #17
0
def tmrca_half(ts, pop_nodes, pop_ids, outfile):
    """Calculats the tmrca half fx from Hejase et al 2020.

        "...test on the time to the most recent common ancestor of half the haploid
    samples from a given species (TMRCAH). Requiring only half the samples
    allows us to consider partial sweeps and provides robustness to the
    inherent uncertainty in the inferred local trees."

    Parameters
    ----------
    ts : Object
        object of type tskit tree seqeunce.
    pop_nodes : List
        population leaves as integers loaded from file.
    pop_ids : List
        id of population nodes to be written in DataFrame.
    outfile : str
        base name of DataFrame file output.

    Returns
    -------
    None.

    """
    df_list = []
    for pop, nodes in zip(pop_ids, pop_nodes):
        int1, int2, tmrcah_rel, time_rel, time_rel2 = calc_tmrcah(ts, nodes)
        # set up DataFrame
        df_pop = pd.DataFrame({
            "population": pd.Series([pop] * len(int1)),
            "tree_start": pd.Series(int1),
            "tree_end": pd.series(int2),
            "tmrcah": pd.Series(tmrcah_rel),
            "time_rel": pd.Series(time_rel),
            "time_rel2": pd.Series(time_rel2)
        })
        df_list.append(df_pop)
    df_pop_combine = pd.concat(df_list).reset_index(drop=True)
    df_pop_combine.to_csv(f"{outfile}.tmrca_half.csv",
                          na_rep="NAN",
                          index=False)
Пример #18
0
def matrix(df,date,pollutant):
    df = df.dropna(subset=[pollutant],axis = 0)
    total_rows = df.count()

    A = np.empty([total_rows,total_rows])
    B = np.empty([total_rows])

    #This for loop is used to populate the matrix A and B
    for i in range(0,total_rows): #c1, c2 ..., initial
    final = [df.loc[i,'lon'],df.loc[i,'lat'],df.loc[i,'alt']]
        for j in range(0,total_rows): #q1, q2 ...
            init = [df.loc[j,'lon'],df.loc[j,'lat'],df.loc[j,'alt']]
            coefficient = a(df.loc[j,'uwnd'],df.loc[j,'vwnd'],init[0],init[1], init[2],final[0],final[1],final[2])
            A.itemset((i,j),coefficient)
            B.itemset(i,df.loc[i,pollutant])

    #solve the matrix
    X = np.linalg.solve(A,B)

    #Save as percentages and save it in matrix A
    for m in range(0,total_rows):
        A[m] = numpy.multiply(A[m],X)
        sum_m = np.sum(A[m])
        for n in range(0,total_rows):
            new_value = (A[m,n]/sum_m)*100
            A.itemset((m,n),new_value)

    my_list = []
    #make a series with all the rows, state names and
    for a in range(0,total_rows):
        new_series = pd.series(data = A[a], index = df.state)
        grouppy = new_series.groupby(groupby.index).sum()
        grouppy_dict = grouppy.to_dict()
        listty = [grouppy_dict,df.loc[a,'index']]
        my_list.append(listty)

    #X is the solution, i.e. the concentration of sources, of different places
    #A is the coefficeints multiplying the sources, i.e. the weight of each source
    return my_list
Пример #19
0
def _get_value_counts(df, col):
	text = ' '.join(df[col])
	text = text.split()
	freq = pd.series(text).value_counts()
	return freq
Пример #20
0
from pandas import DataFrame as df
from pandas import Series as series

details = {
    'name': ['osa', 'ire', 'ifa'],
    'age': [22, 33, 55],
    'location': ['Africa', 'Cuba', 'Brazil']
}

frame = df(details)
print(frame)

frame = df(details, columns=['name', 'location', 'age', 'salary'])
print(frame)

print(frame['location'])
print(frame.location)
print(frame.loc[1])

frame.salary = 5000
print(frame)

s = series([300, 400], index=[0, 1])
print(s)

frame.salary = s
print(frame)
Пример #21
0
import pandas as pd

grade = pd.series([87, 100, 94])

print(grade)

array = pd.series(98.6, range(3))

print(array)
import numpy as nm
import pandas as pd

dt = nm.array([1, 2, 3, 4])
s = pd.series(dt, index=[10, 20, 30, 40])
print(s)
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Russia'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Turkey'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Brazil'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Chile'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Colombia'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Mexico'])
gdeathsxl = gdeathsxl.append(dfgdeaths[dfgdeaths.Country == 'Peru'])
gdeathsxl = gdeathsxl.drop('Population', axis=1)
gdeathsxl = gdeathsxl.reset_index(drop=True)

Locations = pd.series([
    'US', 'Texas', 'Bexar', 'Harris', 'Dallas', 'Tarrant', 'Travis', 'Collin',
    'Hidalgo', 'El Paso', 'Alabama', 'Arizona', 'California', 'Colorado',
    'Conneticut', 'Florida', 'Georgia', 'Louisiana', 'Massachusetts', 'Nevada',
    'New Mexico', 'New York', 'Oklahoma', 'South Carolina', 'Washington',
    'China', 'Belgium', 'Canada', 'France', 'Germany', 'Italy', 'Japan',
    'Korea, South', 'Netherlands', 'Norway', 'Portugal', 'Spain', 'Sweden',
    'Switzerland', 'United Kingdom', 'Egypt', 'South Africa', 'India',
    'Indonesia', 'Iran', 'Philippines', 'Saudi Arabia', 'Singapore',
    'Thailand', 'Poland', 'Russia', 'Turkey', 'Brazil', 'Chile', 'Colombia',
    'Mexico', 'Peru'
])
gcasesxl.loc[:, 'Admin2'] = Locations
gdeathsxl.loc[:, 'Admin2'] = Locations
gcasesxl = gcasesxl.drop(['State', 'Country'], axis=1)
gdeathsxl = gdeathsxl.drop(['State', 'Country'], axis=1)

a = gcasesxl.melt(id_vars='Admin2')

import xlsxwriter

writer = pd.ExcelWriter('covid_tableau_.xlsx', engine='xlsxwriter')
Пример #24
0
import pandas as pd

data = [1, 2, 3, 4, 5]

a = pd.series(data)
Пример #25
0
# #creating a DataFrame using a dictionary
# import pandas as pd
# dictionary={'fruits':['apples', 'banana','mangoes'], 'count':[10,20,15]}
# df= pd.DataFrame(dictionary)
# print (df)

#creating a DataFrame using series
import pandas as pd
series = pd.series([6, 12], index=['a', 'b'])
df = pd.DataFrame(series)
print(df)

# # MERGE OPERATION
# import pandas as pd
# player=['player1', 'player2','player3']
# point =[8,5,6]
# title= ['game1','game2','game3']
# df1  = pd.DataFrame(['Player':player, 'Points':point, 'Title':title])
Пример #26
0
import pandas as pd

grades = pd.Series([87, 100, 94])

print(grades)

same_grade = pd.series(98.6, range(3))

print(same_grade)

#0 98.6
#1 98.6
#2 98.6
#dtype: float64

print(grades[0])
grades.count()
grades.mean()
grades.min()
grades.max()
grades.std()

print(grades.describe())

#you can specify custome indices with the index keyword argument:
grades = pd.series([87, 100, 94], indexes=['Wally', 'Eva', 'Sam'])

print(grades)

#if you initialize a series with a dictionary, its keys become
#the series' indices, and its values become the series' element values
Пример #27
0
Type "help", "copyright", "credits" or "license()" for more information.
>>> list=[13,54,75]
>>> import usermodule
>>> print ("list=",usermodule.list)
list= [13, 54, 75]
>>> list.append(98)
>>> print(list)
[13, 54, 75, 98]
>>> 
>>> 
>>> import pandas as pa
>>> import numpy as nu
>>> import sys
>>> sys._stdout_=sys.stdout
>>> fruit=nu.array(['pears','mango','kiwi'])
>>> series=pa.series(fruit)
    series=pa.series(fruit)
    print(series)
 0 pears
 1 mango
 2 kiwi
>>> 
>>> 
>>> import random
>>> print("random integer is :",random.randint(1,100))
random integer is : 42
>>> 
>>> 
>>> import sys
>>> sys.path
['', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\Lib\\idlelib', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\python39.zip', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\DLLs', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\lib', 'C:\\Users\\sharo\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages']
print('\n corrmat index:', corrmat.index)

# # Feature Importance
"""Feature importance is an inbuilt class that comes with 
   Tree Based Regressor, we will be using Extra Tree Regressor 
   for extracting the top 10 features for the dataset. """

model = ExtraTreesRegressor()
model.fit(X, y)

print('\n Head of X:')
print(X.head())
print('\n feature importance:', model.feature_importances_)

# # plot graph of feature importances for better visualization
feat_importances = pd.series(model.feature_importances_, index = X.columns)
feat_importances.nlargest(5).plot(kind = 'barh')
plt.show()

# # K Nearest Neighbor Regression
sns.distplot(y)
plt.show()

# # split the data and do train and test on the splitted data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

model = Sequential()
# The Input Layer :
model.add(Dense(128, kernel_initializer = 'normal', input_dim = X_train.shape[1], activation = 'relu'))

# The Hidden Layers :
Пример #29
0
from matplotlib import rcParams
import seaborn as sb

rcParams['figure.figsize']=8,4
sb.set_style='whitegrid'

#cars dataset
mpg.plot(kind='hist')

plt.hist(mpg)

sb.distplot(mpg)

cars.plot(kind='scatter',x='hp',y='mpg',c=['darkgray'],s=150)

sb.regplot(x='hp',y='mpg',data=cars,scatter=true)

sb.pairplot(cars)#scatterplot matrix

cars_df=pd.DataFrame((cars.ix[:,(1,3,4,6)].values),colums=['mpg','disp','hp','wt'])
cars_target=cars.ix[:,9].values
target_names=[0,1]
cars_df['group']=pd.series(cars_target,dtype='category')
sb.pairplot(cars_df,hue='group',palette='hls')

cars.boxplot(column='mpg',by='am')
cars.boxplot(column='wt',by='am')

sb.boxplot(x='am',y='mpg',data=cars,palette='hls')

# -*- coding: utf-8 -*-
"""
Created on Thu Jan  2 18:35:36 2020

@author: Santosh
"""

#list to pandas conversion
import pandas as pd
import numpy as np
np_array = np.array([1, 2, 3, 4, 5])
print(np_array)
new = pd.series(np_array)
print(new)

## pandas to list

import pandas as pd
import numpy as np
new = pd.series([1, 2, 3, 4])
print(new)
print(new.tolist())

##dictonory to pandas and list

import pandas as pd
import numpy as np
ds = {'a': 1, 'b': 2, 'c': 6, 'd': 7}
print(ds)
print(pd.series(ds))
print(ds.tolist())
Пример #31
0
import numpy as np
df = pd.DataFrama(columns=('Columna1','Columna2','Columna3'))
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [40, 50, 60], [23, 35, 37]]), 
                  index= [2.5, 12.6, 4.8, 4.8, 2.5], 
                  columns=[48, 49, 50]) #El index hace de indice, columns = indica el nombre las columnas


#Creacion de un DataFrame partiendo de un diccionario o de una serie de pandas
data = {'Lenguaje':['Python','C#','Java'],
       'Dificultad':['Media','Alta','Muy Alta'],
       'Ejecucion':['No compilado','Compilado','Compilado']}
df = pd.DataFrame(data)

#Partiendo de series de pandas
listado_lenguajes = ['Python','C#','Java']
d = {'Lenguaje': pd.series(['Sin compilar','Compilado','Compilado'],index = listado_lenguajes),
    'Dificultad': pd.series(['Media','Alta','Muy Alta'],index = listado_lenguajes)} 
#Si los indices no coincidieran lo que ocurre es que hay un merge y aparecen todos en el indice general del dataFrame, si resulta que una seria,
#no incluye ese indice aparecera como un NaN el valor asociado a ese indice en esa columna.
df = pd.DataFrame(d)

#Hasta aquí hemos creado de diferentes maneras DataFrames, recordar que los valores importados de un csv con read_csv o de un excel mediante read_excel
#asi como los valores importados con SQL pyodbc son mostrados o interpretados por python como un DataFrame, nosotros podemos crear un dataFrame mediante
#un csv unicamente seleccinando los datos que queremos mediante la creacion de un reader y seleccionando las filas.

#Creacion de un dataFrame con columnas seleccionadas -- en este caso solo lenguaje y dificultad
df = pd.dataFrame(data,columnas={'Lenguaje','Dificultad'})

#Podemos crear columnas nuevas de la siguiente manera.
df['Experiencia'] = 'variable asignar' #se crea todo la columna con la misma variable. Podemos hacerla condicional en funcion de otra del Data frame
df['Rentabilidad'] = df['Salario'] > 35000 #Todas las que cumplan con ese criterio en la columna Salario las indexará True en la nueva columna Rentabilidad.
Пример #32
0
import pandas
import numpy
arr = numpy.array([10, 20, 30, 40, 50, 60])
series = pandas.series(arr)
print(series)