import math import scipy as sp import pandas as pd import numpy as np from statistics import norm_pdf, norm_cdf from matplotlib import pyplot as plt from scipy import interpolate from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter from BS_div import div_price_BS, div_delta_BS, div_gamma_BS, div_vega_BS, div_theta_BS, div_rho_BS from vol_imp import vIP #import datetime # load data data = pd.ExcelFile("vale3.xls") #print(data.sheet_names) # Define the columns to be read columns1 = [ 'strike', 'bid', 'offer', 'halfs', 'Price Underlying', 'r', 'd', 'v', 'T', 'Maturity', 'Code', 'Put/Call', 'Underlying Asset', 'Type Asset', 'Exchange', ' Country' ] data_opt = data.parse(u'1.1', names=columns1) data_opt = data_opt.transpose() data_opt = data_opt.values # Variables and Parameters S = data_opt[4, 0] #S = Spot Price r = data_opt[5, 0] / (252) #r: Interest rate daily # Para estimar volatilidade diaria, usar todos os negócios. Criar medida para ponderar por wtt negociada. Comparar tbm com max e min. d = data_opt[6, 0] / (252) #d: dividends payout
def load_workbook(self, fname, wv_user=None, wv_password=None, wv_host_name=None): """ :param fname: :param wv_user: In case of use of WebDav server, user name :param wv_password: In case of use of WebDav server, password :param wv_host_name: In case of use of WebDav server, host name :return: Number of added DataFrames """ # Load a XLSX workbook into memory, as dataframes pr = urlparse(fname) if pr.scheme != "": # Load from remote site if not wv_host_name: wv_host_name = "nextcloud.data.magic-nexus.eu" if pr.netloc.lower() == wv_host_name: # WebDAV parts = fname.split("/") for i, p in enumerate(parts): if p == wv_host_name: url = "/".join(parts[:i + 1]) + "/" fname = "/" + "/".join(parts[i + 1:]) break options = { "webdav_hostname": url, "webdav_login": wv_user, "webdav_password": wv_password } client = wc.Client(options) with tempfile.NamedTemporaryFile(delete=True) as temp: client.download_sync(remote_path=fname, local_path=temp.name) f = open(temp.name, "rb") data = io.BytesIO(f.read()) f.close() else: data = urllib.request.urlopen(fname).read() data = io.BytesIO(data) xl = pd.ExcelFile( xlrd.open_workbook(file_contents=data.getvalue()), engine="xlrd") else: xl = pd.ExcelFile(fname) cont = 0 for sheet_name in xl.sheet_names: df = xl.parse(sheet_name, header=0) # Manage columns cols = [] for col in df.columns: col_parts = col.split(".") if col.lower().startswith("unnamed"): cols.append("") elif len(col_parts) > 1: try: int(col_parts[1]) # This is the case of "col.1" cols.append(col_parts[0]) except: # This is the case of "col_part.col_part" (second part is string) cols.append(col) else: cols.append(col) df.columns = cols self._dataframes.append(df) self._dataframe_names.append(sheet_name) cont += 1 return cont
ZippedFilesloc, and path_processed_data in a new elif block") path_to_helper = os.path.join(path_working, "analysis", "exploratory", "03_Compare_rawnav_to_field_dir") sys.path.append(path_to_helper) # Import custom functions import wmatarawnav as wr from helper_function_field_validation import correct_data_types from helper_function_field_validation import combine_field_rawnav_dat from helper_function_field_validation \ import quick_and_dirty_schedule_qjump_mapping # 2. Read and clean field data # ----------------------------------------------------------------------------- field_xlsx_wb = pd.ExcelFile(path_field_file) field_dict = {} col_keep = [ 'Metrobus Route', 'Bus ID', "Today's Date", 'Signal Phase', 'Time Entered Stop Zone', 'Time Left Stop Zone', 'Front Door Open Time', 'Front Door Close Time', 'Rear Door Open Time', 'Rear Door Close Time', 'Dwell Time', 'Number of boardings', 'Number of alightings', 'Total Time at Intersection', 'Traffic Conditions', 'Notes', ] col_new_names = [ 'metrobus_route_field', 'bus_id_field', 'date_obs_field', 'signal_phase_field', 'time_entered_stop_zone_field', 'time_left_stop_zone_field', 'front_door_open_time_field', 'front_door_close_time_field', 'rear_door_open_time_field', 'rear_door_close_time_field', 'dwell_time_field',
'indicator_category': 'Social', 'indicator_type': 'ICT', 'multiplier': 1000, 'prefix': 'eclac', 'suffix': 'fb' } # <markdowncell> # Generation 1 - This data was collected via the Facebook Advertising interface - https://www.facebook.com/ads/create # <codecell> file_name = "FacebookData.xlsx" popfile = config["gen_1_dir"] + file_name xlfile = pd.ExcelFile(popfile) xlfile.sheet_names # The source file has several different estimates available. We are just using the base scenario ('ESTIMATES'), plus the 'NOTES' sheet. # <markdowncell> # Generation 2 - Refines the rough csv data from Generation 1 into a standardized csv format common to all data sets. Prepares this data for importing to the database. # <markdowncell> # First, create the DataFrame with all population information. The "Country code" column contains the ISO 3166 numeric value, but as an integer, rather than as 3-numeral string padded by zeros. So we fix this to be in line with the ISO spec, and, hence, with our data. We also rename the Country code field to match the name in our schema. # <codecell> df = xlfile.parse("data", header=0) df = df.rename(columns={ "Country": "name",
import cplex import copy import pandas as pd import numpy as np from dijkstrasalgoritm import dijkstra, graph_creator from col_generation import col_generation ## Load data xl = pd.ExcelFile("Input_AE4424_Ass1P1.xlsx") dfs = {sheet: xl.parse(sheet) for sheet in xl.sheet_names} ## Create sets arcs = range(len(dfs['Arcs'].Arc)) origins = dfs['Arcs'].From destinations = dfs['Arcs'].To locations = pd.concat([dfs['Arcs'].From,dfs['Arcs'].To]).unique() commodities = range(1, len(dfs['Commodities'].Commodity) + 1) quantity = np.array(dfs['Commodities'].Quant) ## Create input matrices graph = graph_creator() A_ineq = np.zeros((len(arcs),len(commodities))) C = [] for i in range(len(commodities)): path, C_i = dijkstra(graph, dfs['Commodities'].From[i], dfs['Commodities'].To[i]) C.append(float(C_i*quantity[i])) #aangepast door te vermenigvuldigen met quantity[i] for j in range(len(path)-1): index = dfs['Arcs'].index[(dfs['Arcs'].From == path[j]) & (dfs['Arcs'].To == path[j+1])] A_ineq[index,i] = 1*dfs['Commodities'].Quant[i] A_eq = np.eye(len(commodities)) rhs_ineq = list(dfs['Arcs'].Capacity)
import numpy as np import pandas as pd import xlrd import openpyxl xlsx = pd.ExcelFile('imiona.xlsx') df = pd.read_excel(xlsx, 'Arkusz1') print(df)
# In[1]: import pandas as pd import numpy as np import random from math import e # In[44]: # data = pd.read_csv('C:/Users/OmarVr/Documents/ITESO/Maestria/IDI/perceptron_simple.csv') # data = pd.read_csv('C:/Users/if686748/Documents/perceptron_simple.csv') #data = pd.ExcelFile('/Users/omar/Documents/Python/Maestria/IDI/tabla_para_probar.xlsx') data = pd.ExcelFile('C:/Users/if686748/Downloads/tabla_para_probar.xlsx') data = data.parse(data.sheet_names[0], header=0) x = data.iloc[:,:-2] d = data.iloc[:,-2:] x # In[45]: N = len(x.iloc[0]) # Numero de entradas/ variables/columnas M = 2 #numero de salidas / variables dependientes Q = len(data) # Filas / Learning patterns L = N*M # recomendacion N * M print(N,M,Q,L)
import pandas as pd import numpy as np file1 = '../data/STRIDE_PATIENT.xlsx' x1 = pd.ExcelFile(file1) stride_patient = x1.parse('Sheet1') file2 = '../data//SURGERY.xlsx' x2 = pd.ExcelFile(file2) surgery = x2.parse('Sheet1') stride_patient_req = stride_patient pat_surgery = pd.merge(stride_patient_req, surgery, on='PAT_DEID', how='inner') pat_surgery['BIRTH_DATE'] = pat_surgery['BIRTH_DATE'].str[ 0:7] + '19' + pat_surgery['BIRTH_DATE'].str[7:] pat_surgery['SURGERY_DATE'] = pat_surgery['SURGERY_DATE'].str[ 0:7] + '20' + pat_surgery['SURGERY_DATE'].str[7:] pat_surgery['BIRTH_DATE'] = pd.to_datetime(pat_surgery['BIRTH_DATE']) pat_surgery['SURGERY_DATE'] = pd.to_datetime(pat_surgery['SURGERY_DATE']) print(pat_surgery.dtypes) pat_surgery['Difference'] = pat_surgery['SURGERY_DATE'].sub( pat_surgery['BIRTH_DATE'], axis=0) pat_surgery['AGE AT SURGERY'] = pat_surgery['Difference'] / np.timedelta64( 365, 'D') pat_surgery['AGE AT SURGERY'] = pat_surgery['AGE AT SURGERY'].astype(int) pat_surgery = pat_surgery.drop(['BIRTH_DATE', 'SURGERY_DATE', 'Difference'], axis=1) print(pat_surgery.dtypes)
#_*_coding:utf-8_*_ import pandas as pd import numpy as np from docxtpl import DocxTemplate, InlineImage from docx import shared import os # have_content = False file = pd.ExcelFile("data\小于400三调精度.xlsx") df = file.parse("小于400三调精度") x_data = np.array(df) x_list = x_data.tolist() index = 0 while index < len(x_list): tpl = DocxTemplate("插图模板.docx") values = x_list[index] # values.pop() file_name = "data\\文档\\" + str(x_list[index][0]) + ".docx" gaoqing_name = "data\\高清\\" + x_list[index][3] + ".tif" update_name = "data\\更新\\" + x_list[index][3] + ".tif" if os.path.exists(gaoqing_name) == True: if os.path.exists(update_name): lableID = values[1] context = \ { "col_labels": ["序号", "ID", "行政区代码", "地块序号", "地块面积(平方米)", \ "地块面积(亩)", "核查结果"], "infos": [InlineImage(tpl, gaoqing_name, width=shared.Cm(7.75), height=shared.Cm(7)),
def find_matches(teller, noemer, goktekst, match, file): def gcd(a, b): """Calculate the Greatest Common Divisor of a and b. Unless b==0, the result will have the same sign as b (so that when b is divided by it, the result comes out positive). """ while b: a, b = b, a % b return a def simplify_fraction(numer, denom): if denom == 0: return "Division by 0 - result undefined" # Remove greatest common divisor: common_divisor = gcd(numer, denom) (reduced_num, reduced_den) = (numer / common_divisor, denom / common_divisor) # Note that reduced_den > 0 as documented in the gcd function. if reduced_den == 1: return "%d/%d is simplified to %d" % (numer, denom, reduced_num) elif common_divisor == 1: return "%d/%d is already at its most simplified state" % (numer, denom) else: return reduced_num, reduced_den #return "%d/%d is simplified to %d/%d" % (numer, denom, reduced_num, reduced_den) def splitword(word): return [char for char in word] df = pd.read_excel(pd.ExcelFile(file), "Sheet1") letters = splitword(goktekst) letter_list = [] for l in letters: locations = df[df == l].stack().index.tolist() letter_list.append(locations) # turn the number pairs into fractions factored_letter_list = letter_list combination2 = [p for p in itertools.product(*letter_list)] for i in range(len(letter_list)): for j in range(len(letter_list[i])): factored_letter_list[i][j] = ((letter_list[i][j][0] + 1) / letter_list[i][j][1]) combination = [p for p in itertools.product(*factored_letter_list)] ratio = teller / noemer n = 0 print(f"Ratio = {ratio}") for i in range(len(combination)): # find and print the combinations that we've found sum = 0 for j in range(len(combination[i])): sum += combination[i][j] if ( abs(sum - ratio) < match ): # exacte getallen matchen niet, maar een heel klein verschil is goed genoeg numbers1 = [] numbers2 = [] # make answer combinations for combo_counter in range(len( combination2[i])): # store tuple values in two lists numbers1.append(combination2[i][combo_counter][0] + 1) numbers2.append(combination2[i][combo_counter][1]) # print(combination2[i][combo_counter]) # check answer # calculate noemer reconstructed_noemer = 1 for k in range(len(numbers2)): reconstructed_noemer *= numbers2[k] #print("Noemer is ", reconstructed_noemer) # calculate teller reconstructed_teller = 0 for i in range(len(numbers1)): reconstructed_teller += ( (numbers1[i] * reconstructed_noemer) / numbers2[i]) reconstructed_noemer2 = simplify_fraction( reconstructed_teller, reconstructed_noemer)[1] if reconstructed_noemer2 == noemer: n += 1 print(f"\n\nCombinatie nr. {n} gevonden :)") print(f"Som is {sum}") print(f"Verschil is {sum - ratio}") for k in range(len(letters)): # print out the answer print( f"Letter {letters[k]} staat in vraag {numbers1[k]} van 20{numbers2[k]}" ) #print("Teller is", reconstructed_teller) print( f"Oorspronkelijke teller en noemer zijn: {teller} en {noemer}" ) print( f"De gevonden teller en noemer zijn: {simplify_fraction(reconstructed_teller, reconstructed_noemer)}" )
import pandas as pd import numpy as np xl_file = pd.ExcelFile( "D:/jakubicek/Rot_detection/data/training/labels_bin.xlsx") data = pd.read_excel(xl_file, header=None) file_names = data[0].values.tolist() labels = data.loc[:, 1:7].to_numpy()
# Handle excel files with pandas import pandas as pd # Read .xlsx file using pandas excFile = pd.ExcelFile('InputSheets.xlsx') # Export 0_Deaths_1962_2016 as 0_Deaths_1962_2016.csv df0 = excFile.parse('0_Deaths_1962_2016') df0.to_csv('0_Deaths_1962_2016.csv', index=False, encoding='utf-8') # Export sheet 1_Yearly_AMT as 1_Yearly_AMT.csv df0 = excFile.parse('1_Yearly_AMT') df0.to_csv('1_Yearly_AMT.csv', index=False, encoding='utf-8') # Export sheet 2_EstCO2_AMT as 2_EstCO2_AMT.csv df0 = excFile.parse('2_EstCO2_AMT') df0.to_csv('2_EstCO2_AMT.csv', index=False, encoding='utf-8') # Export sheet 3_Attrib_Year_AMT as 3_Attrib_Year_AMT.csv df0 = excFile.parse('3_Attrib_Year_AMT') df0.to_csv('3_Attrib_Year_AMT.csv', index=False, encoding='utf-8') # Export 4_Prod_Year_AMT as 4_Prod_Year_AMT.csv df0 = excFile.parse('4_Prod_Year_AMT') df0.to_csv('4_Prod_Year_AMT.csv', index=False, encoding='utf-8') # Export sheet 5_Prod_Transm_AMT as 5_Prod_Transm_AMT.csv df0 = excFile.parse('5_Prod_Transm_AMT') df0.to_csv('5_Prod_Transm_AMT.csv', index=False, encoding='utf-8') # Export sheet 6_Prod_Cars as 6_Prod_Cars.csv df0 = excFile.parse('6_Prod_Cars') df0.to_csv('6_Prod_Cars.csv', index=False, encoding='utf-8') # Export sheet 7_Prod_Truck as 7_Prod_Truck.csv df0 = excFile.parse('7_Prod_Truck') df0.to_csv('7_Prod_Truck.csv', index=False, encoding='utf-8') # Export sheet 8_CO2Em_2012 as 8_CO2Em_2012.csv df0 = excFile.parse('8_CO2Em_2012') df0.to_csv('8_CO2Em_2012.csv', index=False, encoding='utf-8')
def generateFavorites(request): # Consultar el grupo actual del admin id_group = get_id_group(request.user.id)['GROUP_ID'][0] # Consultar ids de usuarios del grupo administrado ids_users = str( get_id_users_by_idGroup(id_group)['USER_ID'].tolist()).replace( '[', '(').replace(']', ')') log = get_register_byListIDs(ids_users) folder = 'static/profitability/update_presupuesto' # Unificar parametros y desembolsos de "Nuevos" parametrosNvUnificados = pd.DataFrame() desembolsosNvUnificados = pd.DataFrame() identificadorUnico = 1000 fileNames = '' for i, row in log.iterrows(): fileNames = fileNames + '||' + str(row['FILE_INPUT']) Newfile = folder + '/' + row['FILE_INPUT'] Newpath = os.path.join(os.path.dirname(os.path.dirname(__file__)), '../' + Newfile) Newxlsx_inputs = pd.ExcelFile(Newpath) parametros_nv = pd.DataFrame( pd.read_excel(Newxlsx_inputs, sheet_name='ParametrosNv')) parametros_nv = parametros_nv.dropna( subset=['Id_Tool']) # Eliminar registros cuyo Id_Tool sea nulo parametrosNvUnificados = pd.concat( [parametrosNvUnificados, parametros_nv], sort=False) desembolsos_nv = pd.DataFrame( pd.read_excel(Newxlsx_inputs, sheet_name='DesembolsosNv')) desembolsos_nv = desembolsos_nv.dropna( subset=['Id_Tool']) # Eliminar registros cuyo Id_Tool sea nulo desembolsosNvUnificados = pd.concat( [desembolsosNvUnificados, desembolsos_nv], sort=False) # Agregar identificador único a Id_Tool parametrosNvUnificados[ 'Id_Tool'] = parametrosNvUnificados['Id_Tool'] + identificadorUnico desembolsosNvUnificados['Id_Tool'] = desembolsosNvUnificados[ 'Id_Tool'] + identificadorUnico identificadorUnico = identificadorUnico + 1000 parametrosNvUnificados = parametrosNvUnificados.fillna(0).reset_index( drop=True) desembolsosNvUnificados = desembolsosNvUnificados.fillna(0).reset_index( drop=True) filename = log['FILE_INPUT'][0] file = folder + '/' + filename path = os.path.join(os.path.dirname(os.path.dirname(__file__)), '../' + file) xlsx_inputs = pd.ExcelFile(path) registro, insert_id = create_register(request, fileNames, is_union=1) # Guardar registro (Log de eventos) if registro < 1: status = 0 message = 'ERROR: Se presento un error al intentar guardar el intento (Log de eventos).' file_return = '' else: try: total_time, file_output = get_data( file, parametrosNvUnificados=parametrosNvUnificados, desembolsosNvUnificados=desembolsosNvUnificados) status = 1 message = 'Presupuesto generado con éxito.' file_return = file_output update_register(insert_id=insert_id, file_output=file_output, success=1, total_time=total_time, error='') except: status = 0 error = traceback.format_exc() message = 'ERROR:<br>Se presento un error al momento de generar el presupuesto, por favor contacte con el administrador.' file_return = '' update_register(insert_id=insert_id, file_output='Error al generar archivo', success=0, total_time=0, error=error) data = {"status": status, "message": message, "file_return": file_return} return JsonResponse(data, safe=False)
def create_charts(path): #read the latest file latest_file = sorted(glob.iglob(path + '/*'), key=os.path.getmtime)[-1] df_data = pd.DataFrame() #get date based on file name date = latest_file[len(path):-4] status_file = pd.ExcelFile(latest_file, sort=True) stat_file = pd.read_csv('processing/data/IPOstatus/stat.csv') #status data for sheet in status_file.sheet_names[1:]: data = status_file.parse(sheet, header=[2], index_col=0, skipfooter=1) new_columns = [ data.columns[i - 1] + "二" if data.columns[i].find("Unnamed") >= 0 else data.columns[i] for i in range(len(data.columns)) ] data.columns = new_columns data['date'] = date data['板块'] = sheet df_data = df_data.append(data, ignore_index=True) province = transform(df_data['注册地'].tolist())['省'] df_data['省'] = [x[:-1] if len(x) == 3 else x[0:2] for x in province.values] df_data.replace('', np.nan, inplace=True) df_data['省'].fillna(df_data['注册地'], inplace=True) # print(df_data['省'].value_counts().tolist()) # print(df_data['省'].value_counts().index.tolist()) #stat data #stat_file.drop(columns='waiting',inplace=True) #stat_file.rename(columns={"date": "日期", "total": "受理企业总数","passed":"已过会","queue":"待审企业","failed":"中止审查企业"},inplace = True) latest_stat = stat_file.iloc[-1] date_stat = stat_file['date'] total_stat = stat_file['total'] diff_stat = stat_file['total'] - stat_file['total'].shift(1) passed_stat = list(stat_file['passed']) queue_stat = list(stat_file['queue']) failed_stat = list(stat_file['failed']) ################################################################################## page = Page() style = Style(width=1100, height=600) value = df_data['省'].value_counts().tolist() attr = df_data['省'].value_counts().index.tolist() data = [(name, val) for (name, val) in zip(attr, value)] chart = Map("IPO申报企业分布图", "摸鱼科技", title_pos='center', **style.init_style) chart.add( "", attr, value, maptype='china', is_visualmap=True, is_label_show=True, visual_text_color='#000', tooltip_formatter=geo_formatter, # 重点在这里,将函数直接传递为参数。 label_emphasis_textsize=15, label_emphasis_pos='right', ) page.add(chart) # bar_diff = Bar("") bar_diff.add("受理企业总数", date_stat, total_stat) bar_diff.add("增长(减少)企业数", date_stat, diff_stat, legend_pos="15%") bar_stat = Bar("申报企业情况", "摸鱼科技") bar_stat.add("已过会", date_stat, passed_stat, is_stack=True) bar_stat.add("待审企业", date_stat, queue_stat, is_stack=True) bar_stat.add("中止审查企业", date_stat, failed_stat, is_stack=True, legend_pos="60%") chart = Grid(width=WIDTH) chart.add(bar_stat, grid_left="60%") chart.add(bar_diff, grid_right="60%") page.add(chart) # v1 = df_data['所属行业'].value_counts().tolist() attr = df_data['所属行业'].value_counts().index.tolist() pie = Pie("所属行业分布", "摸鱼科技", title_pos="center", **style.init_style) pie.add("", attr, v1, radius=[45, 55], center=[50, 50], legend_pos="85%", legend_orient='vertical') page.add(pie) # total_counts = df_data['板块'].count() chart = Pie('申报企业所占板块的比例', "申报企业总数: " + str(total_counts), title_pos='center', **style.init_style) for exchange, counts, position in zip(df_data['板块'].unique(), df_data['板块'].value_counts(), range(1, 4)): chart.add("", [exchange, ""], [counts, total_counts - counts], center=[25 * position, 30], radius=[28, 34], label_pos='center', is_label_show=True, label_text_color=None, legend_top="center") page.add(chart) # attr1 = [ attr.replace("(特殊普通合伙)", "").replace('(特殊普通合伙)', '').replace('(特殊普通合伙)', '') for attr in df_data['会计师事务所'].unique().tolist() ] attr2 = df_data['保荐机构'].unique().tolist() v1 = df_data['会计师事务所'].value_counts().tolist() v2 = df_data['保荐机构'].value_counts().tolist() #chart_accountants chart_accountants = Bar("会计师事务所 - 统计图", "摸鱼科技", title_pos="center", **style.init_style) chart_accountants.add("会计师事务所", attr1, v1, legend_pos="75%", mark_point=["max", "min"], is_datazoom_show=True, datazoom_range=[0, 40], datazoom_type='both', xaxis_interval=0, xaxis_rotate=30, yaxis_rotate=30) chart = Grid(width=WIDTH) chart.add(chart_accountants, grid_bottom="30%") page.add(chart) #chart_sponsor chart_sponsor = Bar("保荐机构 - 统计图", "摸鱼科技", title_pos="center", **style.init_style) chart_sponsor.add("保荐机构", attr2, v2, legend_pos="75%", mark_point=["max", "min"], is_datazoom_show=True, datazoom_range=[0, 40], datazoom_type='both', xaxis_interval=0, xaxis_rotate=30, yaxis_rotate=30, yaxis_margin=50) chart = Grid(width=WIDTH) chart.add(chart_sponsor, grid_bottom="30%") page.add(chart) return page
reader = csv.reader(f,dialect = my_dialect) #上記でdelimater = ~とかやったのをまとめたもの #6-1-4 json data #6-1-5 XML HTML conda install lxml tables = pd.read_html() len(table2) #6-1-5-1 lxml.objectify を使ったXMLの読み込み #6-2 バイナリデータ形式で効率よくデータを書き出す(computerノミが読めるデータ) df = pd.read_csv("hubble.csv") df.to_pickle("hubble_pickle") #pickledataで出力 pd.read_pickle("hubble_pickle") #6-2-1 HDF5形式の使用 #6-2-2 excelfile 読み込み a = pd.ExcelFile("trip_information.xlsx") b = pd.read_excel(a,"result")#sheetを指定する writer = pd.ExcelWriter("trip_information.xlsx") a.to_excel(writer,"Sheet2") writer.save() #6-3 web APIを用いたデータの取得 import requests url = "https://api.github.com/repos/pandas-dev/pandas/issues" resp = requests.get(url) data = resp.json() data[0] #data={{},{}}的な issues = pd.DataFrame(data,columns = ["number","title"]) #6-4データベースからデータの取得 import sqlite3
import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go # Using Plotly with Spyder # https://community.plot.ly/t/plotly-for-spyder/10527/2 from plotly.offline import plot # os.chdir(r'C:\Users\abibeka\OneDrive - Kittelson & Associates, Inc\Documents\HCM-CAV-Pooled-Fund\ExperimentalDesignArterial\Results') # os.getcwd() MainDir = r"C:\Users\abibeka\OneDrive - Kittelson & Associates, Inc\Documents\HCM-CAV-Pooled-Fund\Experimental Design Arterial\Results\Results Protected" # Read once --- Takes time to load VolTimeIntDat = pd.read_csv(os.path.join(MainDir, "VolumeTimeIntervalMap.csv")) x1 = pd.ExcelFile(os.path.join(MainDir, "Results_MPR_Plotting_Exp.xlsx")) def ReLab(x): MprLab = { "0PerMPR": "0", "20PerMPR": "20", "40PerMPR": "40", "60PerMPR": "60", "80PerMPR": "80", "100PerMPR": "100", } return MprLab[x] def ReLab_Gap(x):
def __init__(self, subjectDir, dbLoc): self.location = subjectDir dicomDirDict = {} pbar = ProgressBar() for root, dirs, files in os.walk(self.location): dicoms = [] for oneFile, i in zip(files, pbar(range(6000))): if re.search('(dcm|ima)', oneFile, re.IGNORECASE): dicoms.append(os.path.join(root, oneFile)) if not dicoms == []: dicomDirDict[root] = dicoms self.dicomDirs = dicomDirDict self.dirs = dicomDirDict.keys() self.allDicoms = reduce(lambda x, y: x + y, dicomDirDict.values()) self.allDicomNum = len(self.allDicoms) self.dirDicomNum = [(x, len(y)) for (x, y) in dicomDirDict.iteritems()] self.firstDicom = self.allDicoms[0] self.modalityMapping = [modalityMapping(x) for x in self.dirs] self.modalityDicomNum = dict( zip(self.modalityMapping, [x[1] for x in self.dirDicomNum])) ds = dicom.read_file(self.firstDicom) self.age = re.search('^0(\d{2})Y', ds.PatientAge).group(1) self.dob = ds.PatientBirthDate self.id = ds.PatientID self.surname = ds.PatientName.split('^')[0] self.name = ds.PatientName.split('^')[1] try: self.fullname = ''.join([ x[0].upper() + x[1:].lower() for x in [ self.surname, self.name.split(' ')[0], self.name.split(' ')[1] ] ]) self.initial = self.surname[0] + ''.join( [x[0] for x in self.name.split(' ')]) except: self.fullname = ''.join([ x[0].upper() + x[1:].lower() for x in [self.surname, self.name] ]) self.initial = self.surname[0] + self.name[0] self.sex = ds.PatientSex self.date = ds.StudyDate self.experimenter = getpass.getuser() print 'Now collecting information for' print '==============================' print '\n\t'.join([ self.location, self.fullname, self.initial, self.id, self.dob, self.date, self.sex, ', '.join(self.modalityMapping), 'by ' + self.experimenter ]) print '==============================' self.koreanName = raw_input('Korean name ? eg. 김민수: ') self.note = raw_input('Any note ? : ') self.group = raw_input('Group ? : ') self.numberForGroup = maxGroupNum(os.path.join(dbLoc, self.group)) self.study = raw_input('Study name ? : ') self.timeline = raw_input( 'baseline or follow up ? eg) baseline, 6mfu, 1yfu, 2yfu : ' ) #bienseo: Solve unicode-error problems #bienseo: Classify timeline(baseline or follow up) if self.timeline != 'baseline': df = pd.ExcelFile(os.path.join(dbLoc, 'database', 'database.xls')).parse(0) self.folderName = df.ix[(df.timeline == 'baseline') & (df.patientNumber == int(self.id)), 'folderName'].values.tolist()[0] #bienseo: Show back up folder name print '\n\n Now Back up to ' + self.folderName + '\n\n' self.targetDir = os.path.join( dbLoc, self.group, self.folderName, self.timeline) #bienseo: baseline, followUp, 1yfu ... else: self.folderName = self.group + self.numberForGroup + '_' + self.initial self.targetDir = os.path.join(dbLoc, self.group, self.folderName, self.timeline)
import pandas as pd import xlrd import re d = pd.ExcelFile('\Find No of Ctns.xlsx') df = d.parse('sheet1', skiprows=0) for a in range(df.shape[0]): if not isinstance(df['Description'][a], basestring): df = df.drop([a]) df = df.reset_index() del df['index'] Q = [] for a in df.Qty: aa = re.search('\d+', a) if aa: Q.append(aa.group(0)) des = [] for b in df.Qty: bb = re.search('\D+', b) if bb: des.append(bb.group(0)) u = [] for c in df.Description: cc = re.search('(\d+)(BOTTLE)', c) ccc = re.search('(\d+)[X]', c) if cc: u.append(cc.group(1)) elif ccc:
# To support both python 2 and python 3 from __future__ import division, print_function, unicode_literals from scipy import sparse import math import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import ExcelFile Path_output = 'C:/Users/Admin/Desktop/output.xlsx' Path_input = 'C:/Users/Admin/Desktop/input.xlsx' Input = pd.ExcelFile(Path_input) inputData = pd.read_excel(Input, 0, header=None) Output = pd.ExcelFile(Path_output) outputData = pd.read_excel(Output, 0, header=None) Y_out = outputData.values.T[0] X = inputData[0:27].values X = X.T def softmax(V): e_V = np.exp(V - np.max(V, axis=0, keepdims=True)) Z = e_V / e_V.sum(axis=0) return Z def convert_labels(y, C=3): Y = sparse.coo_matrix((np.ones_like(y), (y, np.arange(len(y)))), shape=(C, len(y))).toarray() return Y
# %% """ Created on Tue Sep 6 00:08:31 2016 @author: ajoshi """ from dfsio import readdfs, writedfs from nilearn import image import scipy as sp import pandas as pd import matlab.engine as meng from shutil import copyfile xl = pd.ExcelFile("/big_disk/ajoshi/coding_ground/hybridatlas/hybrid_atlas\ _adjusted_labels_13June2017.xlsx") df = xl.parse("Sheet1") oldID = df['oldID'] newID = df['newID'] left_mid = readdfs('/big_disk/ajoshi/coding_ground/hybridatlas/\ USCBrain_06_17_2017/BCI-DNI_brain.left.mid.cortex.dfs') right_mid = readdfs('/big_disk/ajoshi/coding_ground/hybridatlas/\ USCBrain_06_17_2017/BCI-DNI_brain.right.mid.cortex.dfs') v_lab = image.load_img('/big_disk/ajoshi/coding_ground/hybridatlas/\ USCBrain_06_17_2017/BCI-DNI_brain.label.nii.gz') data1 = v_lab.get_data()
sum = 0.0 suf = 0.0 for i in range(0, len(lista)): sum = sum + lista[i] return sum / len(lista) def varianza(lista): sum = 0.0 for i in range(0, len(lista)): sum = sum + math.pow((lista[i] - media(lista)), 2) return sum / (len(lista)) xls = pd.ExcelFile('DATOS2.xlsx') print(xls.sheet_names) df = xls.parse() lf = [] lf = df.values.tolist() #print(lf) reales = [] simulados = [] for i in range(len(lf)): for j in range(len(lf[i])): if (j == 0): reales.append(lf[i][j])
import xlrd from colorama import Fore, Style import pandas as pd # ============================================================================= # # =========================================================================== # # #-------------------------------------------------------------------------- # # # Panda Read in CMR Data from Excel File # # # # # #-------------------------------------------------------------------------- # # =========================================================================== # ============================================================================= # read Excel (2003) File: data_xls = pd.ExcelFile('CMR.xls') data = data_xls.parse('Sheet1', na_values=['NA']) # ============================================================================= # # =========================================================================== # # #-------------------------------------------------------------------------- # # # Plot to select Depth level to assess the Petrophysical Properties # # # at that level # # #-------------------------------------------------------------------------- # # =========================================================================== # ============================================================================= fig = plt.figure(figsize=(6, 18)) # manually adjust x,y dimension of plot canvas fig.suptitle('Select Depth to Estimate Pc and Thin Section',
longer time period allows one to obtain the bias in the signal arising from periods of low water level larger backlog or interval reduces the number of valid points ''' station = 'WestBay_Stn1_' print(inspect.getfile(inspect.currentframe())) print(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))) print('begin processing') if process: excelFile = 'data/LagunaLake/water_quality/Water_Quality_data_LLDA_v2_1999_2016.xlsx' datFile = pd.ExcelFile(excelFile) sheets = datFile.sheet_names dft = pd.DataFrame() # collect all locations for sheet in sheets: if 'Figures' not in sheet and 'PrimProd' not in sheet: dff = pd.read_excel(excelFile, sheetname = sheet) dff = dff[['Salinity (Chloride)','Date']] dff.rename(columns = {'Salinity (Chloride)' : 'Salinity ' + sheet}, inplace = True) dff.rename(columns = {'Date' : 'date'}, inplace = True) dff.set_index('date',inplace = True) dff.replace('-',np.nan, inplace = True) dff = dff[~dff.index.duplicated(keep='first')] dft = pd.concat([dff,dft], axis = 1, ignore_index = False)
def parseXL(self, xlfile): try: xl = pd.ExcelFile(xlfile) tmpdf = xl.parse(xl.sheet_names[0]) # get the indices for the rows where the L1 headers are present data_l1 = tmpdf.index[tmpdf[tmpdf.isin( DATA_L1_HDR_KEYS)].notnull().any(axis=1)].tolist() # get indices for rows where the L2 headers are present # these will indicate the beginning of data data_l2_begin = tmpdf.index[tmpdf[tmpdf.isin( DATA_L2_HDR_KEYS)].notnull().any(axis=1)].tolist() # get indices for the rows where the misc headers are present # these will indicate the end of data data_l2_end = tmpdf.index[tmpdf[tmpdf.isin( DATA_SUM1_KEYS)].notnull().any(axis=1)].tolist() # get indices for any other keys that are part of data data_other = tmpdf.index[tmpdf[tmpdf.isin( DATA_SUM2_KEYS)].notnull().any(axis=1)].tolist() # generate indices of non-data rows metadata_idx = list(range(0, data_l1[0])) n = len(DATA_L1_HDR_KEYS) # TODO: malformed files may have any of the keys missing resulting in # empty lists of indices for i in range(0, n): metadata_idx += list(range(data_l1[i] + 1, data_l2_begin[i])) if i < n - 1: metadata_idx += list(range(data_l2_end[i] + 1, data_l1[i + 1])) metadata_idx += list(range(data_l2_end[n - 1] + 1, data_other[0])) metadata_idx += list(range(data_other[-1] + 1, tmpdf.shape[0])) # copy metadata rows to its own dataframe tmpdf_md = tmpdf.loc[metadata_idx, :] # clean-up tmpdf_md.dropna(axis=1, how='all', inplace=True) tmpdf_md.dropna(axis=0, how='all', inplace=True) # purge metadata rows (copied above) from the data df tmpdf.drop(metadata_idx, inplace=True) # clean-up tmpdf.dropna(axis=1, how='all', inplace=True) tmpdf.dropna(axis=0, how='all', inplace=True) # collect l1 header names # needed because we don't know the order in which the l1 headers occur in data df_l1 = tmpdf.loc[data_l1] df_l1 = df_l1.loc[:, df_l1.notnull().any()] l1_hdrs = df_l1.T.unstack().tolist() l1_hdrs = [s for s in l1_hdrs if str(s) != 'nan'] # drop all l1 headers # we will be using a single-level index for the final df # l1 headers will be used to populate a categorical var instead tmpdf.drop(data_l1, inplace=True) # create a new ddtaframe for each school type df_list = [] for i in range(0, n): row_idx = list(range(data_l2_begin[i] + 1, data_l2_end[i] + 1)) col_idx = data_l2_begin[i] school_type = l1_hdrs[i] df_list.append(extract_df(tmpdf, row_idx, col_idx, school_type)) # if this the last of the school types we need to append # the aggregated lea rows. we do this as a separate df containing # data_other rows. if (i == n - 1): row_idx = data_other df_list.append(extract_df(tmpdf, row_idx, col_idx, np.nan)) # we have a df with all data for all school types including aggregated # rows at this point df_full = pd.concat(df_list, axis=0, ignore_index=True) # recode column names df_full.rename(columns=DATA_L2_HDR_DICT, inplace=True) # recode school_type df_full['school_type'] = df_full['school_type'].map(DATA_L1_HDR_DICT) # recode other fields cond = df_full['index'].isin(data_l2_end + data_other) df_full.loc[cond, 'school_name'] = df_full[cond]['school_code'].map( DATA_SUM_DICT) df_full.loc[cond, 'school_code'] = INVALID_SCHOOL_CODE cond = df_full['index'].isin(data_other) df_full.loc[cond, 'school_type'] = ALL_SCHOOL_TYPE df_full.drop(['index'], axis=1, inplace=True) # re-arrange cols to original order df_full = df_full[list(DATA_L2_HDR_DICT.values()) + ['school_type']] # # METADATA # # add appropriate prefix and suffix to metadata keys md_keys = [' ' + s + ':' for s in METADATA_KEYS] # get indices for rows where the metadata keywords are present md_idx = tmpdf_md.index[tmpdf_md[tmpdf_md.isin(md_keys)].notnull().any( axis=1)].tolist() # extract non-null cols only for those rows containing metadata keys tmpdf_md = tmpdf_md.loc[md_idx, :] tmpdf_md.dropna(axis=1, how='all', inplace=True) tmpdf_md.dropna(axis=0, how='all', inplace=True) tmpdf_md.columns = list(range(0, tmpdf_md.shape[1])) # extract metadata keys md_keys = list(tmpdf_md.loc[:, METADATA_KEY_COLS].unstack().values) md_keys = list(map(str.strip, md_keys)) md_keys = list(map(str.lower, md_keys)) md_keys = [s.replace(' ', '_') for s in md_keys] md_keys = [s[:-1] for s in md_keys] # extract metadata values md_vals = list(tmpdf_md.loc[:, METADATA_VAL_COLS].unstack().values) md_vals = [s.lower() if isinstance(s, str) else s for s in md_vals] md_dict = dict(zip(md_keys, md_vals)) # store only at the end when we have successfully completed all steps # for both data and metadata self.d_df = df_full self.md_dict = md_dict except Exception as e: raise e
def pandas(): ### PANDAS # Import desired inventory file and save as a variable file = 'Grocery.xlsx' xl = pd.ExcelFile(file) # Display all sheets that exist in spreadsheet file print(xl.sheet_names) # Generate DataFrame from imported spreadsheet and print df1 = xl.parse('Page 1') print(df1) shoplist = ["" for x in range(1)] item = "Start" while item != "Done": item = raw_input("Add item or type Done: ") #except ValueError: #print("Sorry, invalid input. Try adding another") if item != "Done": if df1['Item'].str.match(item).any(): print(item) for j in range(len(shoplist)): if shoplist[j] == "": shoplist[j] = item shoplist.append("") print(shoplist) break else: print("Item not found. Please add a new Item.") else: break # Use to test location of specific item-->> print(df1.loc[df1['Item'] == 'Nuts']) # Find row in which desired item exists for p in range(len(shoplist) - 1): # rowselect = (df1.index[df1['Item'] == shoplist[p]]) rowselect = (df1.index[df1['Item'].str.match(shoplist[p])]) row4search = rowselect + 1 #Variable used to search for numerical information in sheet #Display int64 values #print(rowselect) #print(row4search) #Display parsed row of item info #print(df1.loc[rowselect]) wb = load_workbook('Grocery.xlsx') #Loads spreadsheet #sheet_1 = wb.get_sheet_by_name('Page 1') #Extracts desired sheet sheet_1 = wb['Page 1'] #Extracts desired sheet - this is better than ^ code = np.zeros(sheet_1.max_row ) #Create blank array based on size of inventory list cost = np.zeros(sheet_1.max_row) aisle = np.zeros(sheet_1.max_row) bay = np.zeros(sheet_1.max_row) for i in range(1, sheet_1.max_row): code[i] = sheet_1.cell( row=i + 1, column=2).value #Fill in data to blank arrays from spreadsheet cost[i] = sheet_1.cell(row=i + 1, column=3).value aisle[i] = sheet_1.cell(row=i + 1, column=4).value bay[i] = sheet_1.cell(row=i + 1, column=5).value # Full info display print(shoplist[p]) print('Barcode', code[row4search]) #Display desired item barcode print('Price', cost[row4search]) #Display desired item cost print('Aisle', aisle[row4search]) #Display desired item isle print('Bay', bay[row4search]) #Display desired item bay
update_start_date) # compute epidemic metrics - Compute metrics assoacited with time varying data, writting the data base for each date computing_metrics = True delta = timedelta(days=1) current_date = update_start_date g = [] graph_exists = False while computing_metrics: print("checking epidem_data_" + str(current_date) + ".xlsx") try: # check if metrics were already computed for current_date DB_raw = pd.ExcelFile("data/data_repo/epidem_data_" + str(current_date) + ".xlsx") except FileNotFoundError: if (current_date < update_end_date): print("new epidemic data avalilable, but not processed...UPDATE!") print("computing epidemic data for " + str(current_date)) # initialize data dict epidem_data = dict([]) epidem_data["Name"] = [name for name in constant_db["Name"]] epidem_data["Code"] = [code for code in constant_db["Code"]] epidem_data["latitude"] = [longi for longi in constant_db["long"]] epidem_data["longitude"] = [lat for lat in constant_db["lat"]]
# -*- coding: utf-8 -*- """ Created on Wed Jan 11 01:28:11 2017 @author: Shabaka """ # Import pandas import pandas as pd # Assign spreadsheet filename: file file = 'battledeath.xlsx' # Load spreadsheet: xl xl = pd.ExcelFile(file) # Print sheet names print(xl.sheet_names) """ Import Excel Sheets Specifically """ # Load a sheet into a DataFrame by name: df1 df1 = xl.parse('2004') # Print the head of the DataFrame df1 print(df1.head()) # Load a sheet into a DataFrame by index: df2 df2 = xl.parse(0)
sheet1.write(i, 0, f) sheet1.write(i, 1, count / 60.0) if (count / 60.0 > 0): sheet1.write(i, 1, 1) else: sheet1.write(i, 1, 0) image_list = [] wb.save('Pixel_Density_' + k[0:1] + "_" + k[2:3] + '.xls') x_list = [] for g in glob.glob("*.xls"): x_list.append(g) x_list.sort() print x_list kkk = 0 for z in x_list: xls_file = pd.ExcelFile(z) df = xls_file.parse('Sheet 1') df = df.sort_values(by='IMAGE NAME') df = df.drop('IMAGE NAME', 1) df = df.T df = df.reset_index(drop=True) df.to_csv('file' + str(kkk) + '.csv') df = read_csv('file' + str(kkk) + '.csv') first_column = df.columns[0] # Delete first df = df.drop([first_column], axis=1) df.to_csv('file' + str(kkk) + '.csv', index=False) df = rea_csv('file' + str(kkk) + '.csv') first_column = df.columns[64] # Delete first df = df.drop([first_column], axis=1)
# Concatenate excel (.xlsx) files import pandas as pd import os # filenames excel_names = [f for f in os.listdir() if f[-5:] in ('.xlsx', '.xls')] # read them in excels = [pd.ExcelFile(name) for name in excel_names] # turn them into dataframes frames = [ x.parse(x.sheet_names[0], header=None, index_col=None) for x in excels ] # delete the first row for all frames except the first # i.e. remove the header row -- assumes it's the first frames[1:] = [df[1:] for df in frames[1:]] # concatenate them.. combined = pd.concat(frames) # write it out combined.to_excel("compiled.xlsx", header=False, index=False)
import pandas import random path1 = r"FFL2 Data.xlsx" workbook = pandas.ExcelFile(path1) encounters = workbook.parse("Encounters") INCREASE_RATE = 5 def rollGroupSize(text): roll_range = text.split("-") if len(roll_range) > 1: return random.randint(int(roll_range[0]), int(roll_range[1])) else: return 1 gen = "y" while gen != "n": zone = input("Generate encounter for which zone: ") roll = random.randint(1, 256) group1 = "" group2 = "" group3 = "" num1 = 0 num2 = 0 num3 = 0 zone_table = encounters.loc[encounters["ZONE"] == zone] for row in range(encounters.shape[0]): if zone_table.iloc[row, 2] >= roll: