Пример #1
0
def generate_Z_msr_org(numOfBuses, numOfLines, bus_data_df, topo_mat,
                       file_name):
    import pandas as pd
    import numpy as np
    import openpyxl
    from openpyxl import load_workbook

    # Creating Measurement Data to run state estimation
    bus_data = bus_data_df[[
        'Remote controlled bus number', 'Load MW', 'Generation MW'
    ]]
    bus_data.columns = ['Bus number', 'Load', 'Generation']

    # Correcting the load generation for a lossless DC system
    correction_load = sum(bus_data['Load']) - sum(bus_data['Generation'])
    print("correction_load: ", correction_load)

    # Adding the correction load to the largest generator
    bus_data['Generation'].loc[
        bus_data['Generation'].idxmax()] += correction_load
    # correction_check = sum(bus_data['Load']) - sum(bus_data['Generation'])
    # print("correction_check: ", correction_check)

    # Bus Power = Bus Gen - Bus Load
    bus_data['Bus Power'] = bus_data['Generation'] - bus_data['Load']

    print("bus_data:\n", bus_data.head())

    # Padding 0 in the top of the data from reference
    Z_data_bus_power = pd.DataFrame(
        pd.concat([pd.Series([0]), bus_data['Bus Power']]))

    # Topomat containing only the bus power rows along with reference bus
    B_mat_bus_power = pd.concat(
        [topo_mat.loc[0:0], topo_mat.loc[numOfLines * 2 + 1:]])

    # Estimating the states fromt the bus power data
    state_original = np.linalg.pinv(B_mat_bus_power) @ Z_data_bus_power

    # Calculating the Z_msr_org using the Topology Matrix and states
    Z_msr_org = topo_mat @ state_original
    Z_msr_org.columns = ['Data']
    # Saving the data
    book = load_workbook(file_name)
    writer = pd.ExcelWriter(file_name, engine='openpyxl')
    writer.book = book
    Z_msr_org.to_excel(writer, "Measurement Data", index=False)
    bus_data.to_excel(writer, "Bus Data", index=False)
    writer.save()
    writer.close()
    # saving complete !

    print("Z_msr_org:\n", Z_msr_org.head())

    return Z_msr_org, bus_data
Пример #2
0
 def test_init_example_module(self):
     """Ensures that the twine class can be instantiated with a file"""
     # test_data_file = self.path + "test_data/.json"
     df = pd.DataFrame({
         "attribute":
         ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"],
         "value": [1, 2, 4, 5, 3, 6, 100, 33, 44, 77, 77, 99],
     })
     gvt = GroupedVariableTransformation(key="attribute", target="value")
     gvt.fit(df)
     print(gvt)
     gvt.transform(df)
Пример #3
0
def topologyProcessor(numOfBuses, numOfLines, line_data):
    import pandas as pd
    import numpy as np
    import openpyxl
    from openpyxl import load_workbook

    numOfZ = numOfBuses + numOfLines * 2

    # Placeholder for topoMat
    topo_mat = pd.DataFrame(np.zeros((numOfZ + 1, numOfBuses), dtype=float))

    # rows representing the line powerflow
    for line in line_data.values:
        # Line information
        lineID = int(line[0])
        fromBus = int(line[1] - 1)
        toBus = int(line[2] - 1)
        admittance = line[3]

        #
        topo_mat.iloc[lineID, fromBus] = admittance
        topo_mat.iloc[lineID, toBus] = -admittance

        topo_mat.iloc[lineID + numOfLines, fromBus] = -admittance
        topo_mat.iloc[lineID + numOfLines, toBus] = admittance

        # rows representing the bus consumption
    for busIndx in range(1, numOfBuses + 1):

        busTopo = np.zeros(numOfBuses)

        for line in line_data.values:

            # Line information
            lineID = int(line[0])
            fromBus = int(line[1])
            toBus = int(line[2])

            if fromBus == busIndx:
                busTopo = busTopo + topo_mat.loc[lineID]

            elif toBus == busIndx:
                busTopo = busTopo - topo_mat.loc[lineID]

        topo_mat.loc[2 * numOfLines + busIndx] = busTopo.copy()

        # adding 1 in the first line which represents the reference bus
    topo_mat.iloc[0, 0] = 1
    return topo_mat
Пример #4
0
def review_count_scrape():
    url ='https://www.amazon.com/Best-Sellers/zgbs'
    headers = ({'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537',
                'Accept-Language': 'en-US, en; q=0.5''})
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text,'Lxml')
        print(r.status_code)
        
        product_total_review= [i.text for i in soup.findAll('a', {'class': 'a-small a-link normall'}]
        df =pd.DataFrame(product_total_review)
        print (df)
        
        time.sleep(60)
        
        end_timer =time.time() +60 * 2
        while time.time() < end t
Пример #5
0
def xlm_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/".xml"'):
        tree = ET.parse(xml_file)
        root = tree.getroot()

        for member in root.findall('object'):
            value = (
                'data/' + root.find('filename').text,
                int(member[4][0].text),
                int(member[4][1].text),
                int(member[4][2].text),
                int(member[4][3].text),
                member[0].text,
            )
            xml_list.append(value)
    column_name = ['filename', 'xmin', 'ymin', 'xmax', 'ymax', 'class']
    xml_df = pd.DataFrame(xml_list, column=column_name)
    return xml_df
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 15 15:04:07 2018

@author: david
"""

import panda as pd
import numpy as np

df = pd.DataFrame({
    'rest_name': ['Salty Sow'],
    'rating': [4.5],
    'reviews_num': [1968]
})
Пример #7
0
def textprocessing():
    commentList = []
    dateList = []
    for i in range(10):
        num = i + 1
        [commentList_temp, dateList_temp] = getComments(num)
        commentList.append(commentList_temp)
        dateList.append(dateList_temp)
    commentList = reduce(operator.add, commentList)
    dateList = reduce(operator.add, dateList)

    dataTmp = {'comments': commentList[:], 'date': dateList[:]}
    df2 = pd.DataFrame(dataTmp)
    pd.DataFrame(df2).to_excel("text-movie.xls",
                               sheet_name="sheet1",
                               index=False,
                               header=True)

    comments = ''
    for k in range(len(commentList)):
        comments = comments + (str(commentList[k])).strip()

    pattern = re.compile(r'[\u4e00-\u9fa5]+')
    filterdata = re.findall(pattern, comments)  # 过滤标点 用正则表达式
    cleaned_comments = ''.join(filterdata)

    seg_list_exact = jieba.cut(cleaned_comments, cut_all=False)  # 精确模式分词
    object_list = []
    remove_words = pd.read_csv("stopwords.txt",
                               index_col=False,
                               quoting=3,
                               sep="\t",
                               names=['stopword'],
                               encoding='utf-8')

    for word in seg_list_exact:  # 循环读出每个分词
        if word not in remove_words:  # 如果不在去除词库中
            object_list.append(word)  # 分词追加到列表

    # 词频统计
    word_counts = collections.Counter(object_list)  # 对分词做词频统计
    word_counts_top10 = word_counts.most_common(10)  # 获取前10最高频的词
    print(word_counts_top10)  # 输出检查

    # 词频展示
    mask = np.array(Image.open('background.jpg'))  # 定义词频背景
    wc = wordcloud.WordCloud(
        background_color='white',  # 设置背景颜色
        font_path='/System/Library/Fonts/Hiragino Sans GB.ttc',  # 设置字体格式
        mask=mask,  # 设置背景图
        max_words=200,  # 最多显示词数
        max_font_size=100,  # 字体最大值
        scale=32  # 调整图片清晰度,值越大越清楚
    )

    wc.generate_from_frequencies(word_counts)  # 从字典生成词云
    image_colors = wordcloud.ImageColorGenerator(mask)  # 从背景图建立颜色方案
    wc.recolor(color_func=image_colors)  # 将词云颜色设置为背景图方案
    wc.to_file("/Users/ownpro/Desktop/temp.jpg")  # 将图片输出为文件
    plt.imshow(wc)  # 显示词云
    plt.axis('off')  # 关闭坐标轴
    plt.show()  # 显示图像
Пример #8
0
data_length = len(mylist)
data_list = []

# Convert the strings to dictionaries
import json
for i in mylist:
    d = json.loads(i)
    data_list.append(d)
list_keys = list(data_list[0].keys())

# The key_listed_used can vary for different tasks
# here just use three features as a demo
key_list_used = ['all_ratio', 'platform', 'genres']
value_list1 = []
value_list2 = []
value_list3 = []
key1 = key_list_used[0]
key2 = key_list_used[1]
key3 = key_list_used[2]
for i in data_list:
    # features w/o any values return None
    value_list1.append(i.get(key1))
    value_list2.append(i.get(key2))
    value_list3.append(i.get(key3))
value_list_used = [value_list1, value_list2, value_list3]
dic_used = dict(zip(key_list_used, value_list_used))

# Convert to DataFrame
import panda as pd
df_used = pd.DataFrame(dic_used)
Пример #9
0
import dash 
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame(
    {
        "State":["Andaman And Nicobar", "Andhra Pradesh","Arunachal Pradesh",  ]
    }
)
Пример #10
0
import dash
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({})
Пример #11
0
@author: arti
"""

import googlemaps
import panda as pd

my_key = ""

maps = googlemaps.Client(key=my_key)

lat = []
lng = []

places = ["서울시청", "국립국악원", "해운대해수욕장"]

i = 0
for place in places:
    i = i + 1
    try:
        print(i, place)
        geo_location = maps.geocode(place)[0].get('geometry')
        lat.append(geo_location['location']['lat'])
        lng.append(geo_location['location']['lng'])

    except:
        lat.append('')
        lng.append('')
        print(i)

df = pd.DataFrame({'위도': lat, '경도': lng}, index=places)
print(df)
Пример #12
0
import panda as pd
import os
import time
import gps_data_read as gps

ret, gps_loc_data = gps.GPS_Read_Data_RMC()
gps_dist_present = None
path = os.getcwd() + "/gps_data_csv"
if (not os.path.exists(path)):
    os.makedirs(path)
try:
    os.chdir(path)
except NameError as e:
    print("Can not access path")
    exit()
i = 0

while ret == True:
    try:
        ret, gps_loc_data.append(gps.GPS_Read_Data_RMC())
        gps_dist_present.append(
            gps.dist_calc_present(gps_loc_data[i][0], gps_loc_data[i][2]))
    except KeyboardInterrupt as e:
        data_set = list(zip(gps_loc_data, gps_dist_present))
        df = pd.DataFrame(data=data_set, columns=None)
        df.to_csv("gps_data_set.csv", index=False, header=False)
Пример #13
0
        #prob = cv2.resize(pred, (orig_width, orig_height))
        prob = pred
        mask = prob > threshold
        ### imshow
        ### A little animation -start-
        cv2.imshow('image',prob)
        cv2.waitKey(1000) ### 100 msec for each
        ### A little animation -end-
        rle = run_length_encode(mask)
        rles.append(rle)

cv2.destroyAllWindows()

### Submit to Kaggle
print("Generating submission file...")
df = pd.DataFrame({'img': test_names, 'rle_mask': rles})
df.to_csv('submit/submission.csv.gz', index=False, compression='gzip')


'''
gif2png.py
from PIL import Image
kaggle_train_mask_path = 'Kaggle_Car_Data/train_masks/train_masks'

for filename in os.listdir(kaggle_train_mask_path):
    if filename.endswith(".gif"):
        print(filename)
        im_mask = Image.open(kaggle_train_mask_path + '/' + filename)
        png_filename = filename[:-4] + '.png'
        im_mask.save(kaggle_train_mask_path + '/' + png_filename,"PNG")
,,,
Пример #14
0
    def get_unsorted_all_commits_dates(self):

        rev = sys.argv[1]
        cumulative = 0
        if len(sys.argv) == 4:
            if (sys.argv[3] == "c"):
                cumulative = 1
            else:
                print("Dont know what you mean with %s" % sys.argv[3])
                sys.exit(-1)
        rev_range = int(sys.argv[2])

        print("#sublevel commits %s stable fixes" % rev)
        print("lv hour bugs")  #tag for R data.frame
        rev1 = rev

        v44 = 1452466892
        try:
            for sl in range(1, rev_range + 1):
                rev2 = rev + "." + str(sl)
                gitcnt = self.gitcnt + rev1 + "..." + rev2
                gittag = self.gittag + rev2
                git_rev_list = Popen(gitcnt,
                                     stdout=PIPE,
                                     stderr=DEVNULL,
                                     shell=True)
                commit_cnt = self.get_commit_cnt(git_rev_list)
                if cumulative == 0:
                    rev1 = rev2
                # if get back 0 then its an invalid revision number
                sl_list = []
                days_list = []
                commits_cnt_list = []
                if commit_cnt:
                    git_tag_date = Popen(gittag,
                                         stdout=PIPE,
                                         stderr=DEVNULL,
                                         shell=True)
                    days = self.get_tag_days(git_tag_date, v44)
                    sl_list.append(sl)
                    days_list.append(days)
                    commits_cnt_list.append(commit_cnt)
                    print("%d %d %d" % (sl, days, commit_cnt))

                else:
                    print('Its an invalid revision number')
                    break

                # create dataframe
                re = np.array([sl_list, days_list, commits_cnt_list])
                data = re.transpose()
                print(data)
                c = ["1v", "hour", "bugs"]
                df = pd.DataFrame(data=data, columns=c)

                # write in csv file
                data_file = 'data_v4.4'
                csv_data = data_file + ".csv"
                df.to_csv(csv_data)
        except:
            raise ValueError('something wrong.')
Пример #15
0
import dash
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({
    "State": [
        "Andaman And Nicobar",
        "Andhra Pradesh",
        "Arunachal Pradesh",
        "Assam",
        "Bihar",
        "Chandigarh",
        "Chhattisgarh",
    ]
})
Пример #16
0
import dash
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({"State": []})
Пример #17
0
#reading dataset
data = pd.read_csv('/root/task5/pro.csv')
#dropping the unusefull
data = data.dropna()
data = data.drop(['url'], axis='columns',implace=True)
ip= data['IP']
count=Counter(ip)

#scalling the dataset
sc = StandardScaler()
data_scaled = sc.fit_transform(dataset)
model = KMeans(n_clusters=4)
#fitting the model
model.fit(data_scaled)
pred  = model.fit_predict(data_scaled)
dataset_scaled = pd.DataFrame(data_scaled, columns=['IP', 'c'])
pred=dataset_scaled['cluster']

#plotting the clusters 
f1 = data[data.cluster==0]
f2 = data[data.cluster==1]
f3 = data[data.cluster==2]
f4 = data[data.cluster==3]
plt.scatter(f1.count,f1['IP'],color='green')
plt.scatter(f2.count,f2['IP'],color='red')
plt.scatter(f3.count,f3['IP'],color='black')
plt.scatter(f4.count,f4['IP'],color='blue')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],km.cluster_centers_[:,2],color='purple',marker='*',label='centroid')
plt.xlabel('Count')
plt.ylabel('IP')
plt.legend()
Пример #18
0
corona_dataset_csv.head(5)

#Grouping by the country
corona_dataset_csv.groupby("Countr/Region").sum()

#Visualising data related to country
corona_dataset_csv.loc["India"]

#Compare the data by country with the help of Plotting
corona_dataset_csv.loc["India"].plot()
corona_dataset_csv.loc["China"].plot()
corona_dataset_csv.loc["Italy"].plot()
plt.legend()

#For Plotting the rise by the desired date
corona_dataset_csv.loc["India"][:10].plot()

#Calculating the max. spike of the covid-19 case:
corona_dataset_csv.loc["India"].diff().plot()

#Finding the max. derivative:
countries = list(corona_dataset_csv.index)
max_infection_rates = []
for c in countries:
    max_infection_rates.append(corona_dataset_csv.loc[c].diff().max())
corona_dataset_csv['max infection rate'] = max_infection_rates

#Creating a new Dataframe with needed Dataset
corona_data = pd.DataFrame(corona_dataset_csv['max infection rate'])
corona_data.head()
Пример #19
0
import dash
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({"State": ["Andaman And Nicobar"]})
Пример #20
0
import dash 
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame(
    {
        "State":["Andaman And Nicobar", "Andhra Pradesh","Arunachal Pradesh","Assam", "Bihar", "Chandigarh", "Chhattisgarh", 
        "Dadra And Nagar Haveli", "Delhi", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jammu And Kashmir", "Jharkhand", "Karnataka", ]
    }
)
Пример #21
0
import dash 
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame(
    {
        "State":["Andaman And Nicobar", "Andhra Pradesh","Arunachal Pradesh","Assam", "Bihar", "Chandigarh", "Chhattisgarh", 
        "Dadra And Nagar Haveli", "Delhi", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jammu And Kashmir", "Jharkhand", "Karnataka", "Kerala", "Lakshadweep", "Madhya Pradesh", 
        "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Orissa", "Puducherry", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Tripura", "Uttar Pradesh", "Uttarakhand",
        "West Bengal"], 
        "Population":[380581, 84580777, 1383727, 31205576, 104099452, 1055450, 25545198, 586956, 16787941, 1458545, 60439692, 25351462, 6864602, 12541302, 32988134, 61095297, 
        33406061, 64473, 72626809, 112374333, 2855794, 2966889, 1097206, 1978502, 41974218, 1247953, 27743338, 68548437, 610577, 72147030, 3673917, 199812341, 10086292, 91276115]
    }
)
fig = px.bar(df, x="State", y="Population", color="State", barmode="group")

app.layout = html.Div(children=[
    html.H1(children='Hello Dash'),

    html.Div(children='''
        Dash: A web application framework for Python.
    '''),

    dcc.Graph(
        id='example-graph',
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from pystocktwits_data_utils import PyStockTwitData

import panda as pd

data = PyStockTwitData()

# Get all msgs from this company that is specified
list_of_msgs, list_of_sentiment_json = (
    data.get_all_msgs_with_sentiment_by_symbol_id("VEEV"))

# Parse out the Bullish, Bearish, or None Sentiment
list_of_sentiment = (
    data.extract_sentiment_statements_basic(list_of_sentiment_json))

# Create a Dataframe
dataframe = pd.DataFrame({'msg': list_of_msgs, 'sentiment': list_of_sentiment})

# Print to see dataframe and save
print(dataframe)
dataframe.to_csv('../sample_csv_output/pystockdataset.csv')
Пример #23
0
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({
    "State": [
        "Andaman And Nicobar",
        "Andhra Pradesh",
        "Arunachal Pradesh",
        "Assam",
        "Bihar",
        "Chandigarh",
        "Chhattisgarh",
        "Dadra And Nagar Haveli",
        "Delhi",
        "Goa",
        "Gujarat",
        "Haryana",
        "Himachal Pradesh",
        "Jammu And Kashmir",
        "Jharkhand",
        "Karnataka",
        "Kerala",
        "Lakshadweep",
        "Madhya Pradesh",
        "Maharashtra",
        "Manipur",
        "Meghalaya",
        "Mizoram",
        "Nagaland",
        "Orissa",
    ]
})
import numpy as np
import panda as pd

np.random.seed(
    101)  # Todos os numeros que gerar vão ser iguais em todo computador

df = pd.DataFrame(np.random.randn(5, 4),
                  index="A B C D E".split(),
                  columns="W X Y Z".split())

bol = df > 0  #vai retornar uma tabela de booleanos
df[bol]  # Numeros onde true, NaN onde false
df(df['W'] >
   0)  #Nesse caso, vai excluir as linhas onde não atende. Retorna em numeros
df[df['W'] > 0][
    'Y']  #Pega a coluna Y, mas só das linhas onde o W é maior que zero
df[(df['W'] > 0) &
   (df['Y'] > 1)]  # 'and' só compara booleanos, tem que ser o &

df.reset_index()  #Adiciona uma coluna de números ao lado de A, B, C...
df.set_index('W')  #W vai virar o índice
Пример #25
0
import dash
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({
    "State": [
        "Andaman And Nicobar", "Andhra Pradesh", "Arunachal Pradesh", "Assam",
        "Bihar", "Chandigarh", "Chhattisgarh", "Dadra And Nagar Haveli",
        "Delhi", "Goa", "Gujarat", "Haryana", "Himachal Pradesh",
        "Jammu And Kashmir", "Jharkhand", "Karnataka", "Kerala", "Lakshadweep",
        "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram",
        "Nagaland", "Orissa", "Puducherry", "Punjab", "Rajasthan", "Sikkim",
        "Tamil Nadu", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"
    ],
    "Population": [
        380581,
    ]
})
Пример #26
0
finally, score with MAE
'''

# 1) drop columns
cols_with_missing_values = [
    col for col in train_X.columns if train_X[col].isnull().any()
]

reduced_train_x = train_X.drop(cols_with_missing_values, axis=1)
reduced_val_x = val_X.drop(cols_with_missing_values, axis=1)

# 2) imputation
from sklearn.impute import SimpleImputer

my_imputer = SimpleImputer()
imputed_train_x = pd.DataFrame(my_imputer.fit_transform(train_X))
imputed_val_x = pd.DataFrame(my_imputer.fit_transform(val_X))

# imputation remove column names; adding it back
imputed_train_x.columns = train_X.columns
imputed_val_x.columns = val_X.columns

# print shape of data (num_rows, num_columns)
train_X.shape()

# print number of missing values in each column of training data
missing_val_count_by_column = train_X.isnull().sum(
)  # returns all column with sum of null
missing_val_count_by_column[missing_val_count_by_column >
                            0]  # returns column names if sum > 0 (bool)
'''Categorical Variables
InteractiveShell.ast_node_interactivity = "all"

train_x, test_x, train_y, test_y = train_test_split(x,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=1)
train_x.shape
test_x.shape
train_y.shape
test_y.shape

linear_model = LinearRegression()
linear_model
linear_model.fit(train_x, train_y)

test_prediction = linear_model.predict(test_x)
print(linear_model.coef_)
df_model = pd.DataFrame({'features': x.columns, 'coeff': linear_model.coef_})
df_model = df_model.sort_values(by=['coeff'])
df_model

df_model.plot(x='features', y='coeff', kind='bar', figsize=(15, 10))
plt.show()

fdf = pd.concat([test_x, test_y], 1)
fdf['Predicted'] = np.round(predict_test, 1)

fdf['Prediction_Error'] = fdf[''] - fdf['Predicted']
# Add something for fdf[''] - maybe fdf['Death']
fdf
Пример #28
0
#read and write to csv file
df = pd.read_csv('file_name')
df = df.to_csv('example', index=False)
#Excel Input and output, beware of image in the excel file, it may cause it to crash
pd.read_excel('Excel_Sample.xlsx',sheetname='Sheet1')
pd.to_excel('excelname.xlsm', sheetname)
#Html Input
df = pd.read_html('http://.....html')
#Read Database in sql
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:momory:')
df.to_sql('data', engine)
sql_df = pd.read_sql('data', con=engine)

#Convert to DataFrames
df= pd.DataFrame(np.random(5,4), index='A B C D E'.split(), columns='W X Y Z'.split())
df['W'] or df['W', 'Z'] #to call one or more column
#creating new columns
df['new'] = #whatever
#Removing columns
df.drop('nameofthecolumn',axis=1) #need to use "inplace=True" to make it inplace change
#selecting Rows
df.loc['A'] #using the name to location row
df.iloc[2] #using index to location the row
df.loc['B','Y'] #locate a item in table
df.loc[['A', 'B', ['W', 'Y']]] #selecting a box of element
#can also use condition to filter data
df[df>0]
df[df['W']>0]

#reset to index base row
Пример #29
0
import dash 
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame(
    {
        "State":["Andaman And Nicobar", "Andhra Pradesh","Arunachal Pradesh","Assam", "Bihar", "Chandigarh", "Chhattisgarh", 
        "Dadra And Nagar Haveli", "Delhi", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jammu And Kashmir", "Jharkhand", "Karnataka", "Kerala", "Lakshadweep", "Madhya Pradesh", 
        "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Orissa", "Puducherry", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Tripura", "Uttar Pradesh", "Uttarakhand",
        "West Bengal"], 
        "Population":[380581, 84580777, 1383727, 31205576, 104099452, 1055450, 25545198, 586956, 16787941, 1458545, 60439692, 25351462, 6864602, 12541302, 32988134, 61095297, 
        33406061, ]
    }
)
Пример #30
0
import dash
import dash_core_components as doc
import dash_html_components as html
import plotly.express as px
import panda as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.DataFrame({""})