def diag_aggregates(): years = ['2006', '2007', '2008', '2009'] df_final = None for yr in years: xls = ExcelFile(fname_all) df = xls.parse(yr, hindex_col= True) cols = [u"Mesure", u"Dépense \n(millions d'€)", u"Bénéficiaires \n(milliers)", u"Dépenses \nréelles \n(millions d'€)", u"Bénéficiaires \nréels \n(milliers)", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"] selected_cols = [u"Mesure", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"] df = df[selected_cols] df['year'] = yr df['num'] = range(len(df.index)) df = df.set_index(['num', u'Mesure', 'year']) if df_final is None: df_final = df else: df_final = df_final.append(df, ignore_index=False) # DataFrame.groupby() df_final = df_final.sortlevel(0) print str(fname_all)[:-5]+'_diag.xlsx' writer = ExcelWriter(str(fname_all)[:-5]+'_diag.xlsx') df_final.to_excel(writer, sheet_name="diagnostics", float_format="%.2f") writer.save()
class EXCEL: def __init__(self, xls_filepath): self.filepath = xls_filepath self.xls_reader = ExcelFile(xls_filepath) self.sheet_names = self.xls_reader.sheet_names if len(self.sheet_names) == 1: self.select_sheet(self.sheet_names[0]) self.time = datetime.datetime.now() def add(self): pass @property def data(self): return self._data def select_sheet(self, sheet_name): self._data = self.xls_reader.parse( sheet_name) #self._data = pd.read_excel(xls_filepath) def merge_sheet(self): sheets = [] for sheet_name in self.sheet_names: sheet = self.xls_reader.parse(sheet_name) sheets.append(sheet) self._data = pd.concat(sheets) def save(self, xls_filepath, sheet_name='Sheet5'): self.xls_reader.close() self.xls_writer = ExcelWriter(xls_filepath) self._data.to_excel(self.xls_writer, sheet_name) self.xls_writer.save()
def read_excel(fname, header=None): """Read excel into dict. Args: fname: name of excel file header: The finland files does not have a header Output: dictionary containing the data """ xls = ExcelFile(fname) if header: parse_cols = [1] else: parse_cols = None df = xls.parse(xls.sheet_names[0], skiprows=1, parse_cols=parse_cols) # Fix keys temp = df.to_dict() for key in temp: new_key = key.replace(" - ", "_") temp[new_key] = temp.pop(key) # Stupid hack for Finland if header: temp[header] = temp.pop(temp.keys()[0]) return temp
def network_UKGDS(filename,header=28): """ Load Excel file with UKGDS data format and build dict array of bus coordinates and graph structure suitable for plotting with the networkx module. """ from numpy import array,where from pandas import ExcelFile from networkx import Graph data = ExcelFile(filename) bus = data.parse("Buses",header=header) branch = data.parse("Branches",header=header) pos = {} for node in range(len(bus["BNU"])): pos.update({node:array([bus["BXC"][node],bus["BYC"][node]])}) net = [] for k in range(len(branch["CFB"])): von = where(bus["BNU"]==branch["CFB"][k])[0][0] zu = where(bus["BNU"]==branch["CTB"][k])[0][0] net.append([von,zu]) nodes = set([n1 for n1,n2 in net] + [n2 for n1,n2 in net]) G = Graph() for node in nodes: G.add_node(node) for edge in net: G.add_edge(edge[0],edge[1]) return G,pos
def convert(self, file_bytes): """Accepts a bytes array and returns a json string """ excel_file = None try: excel_file = ExcelFile(BytesIO(file_bytes)) except Exception as e: log.error("Error reading in excel bytes, {}".format(e)) if excel_file is None: return self._default_value() if self.sheet_reader.sheet_name not in excel_file.sheet_names: return self._default_value() try: df = excel_file.parse(self.sheet_reader.sheet_name) success, output = self.sheet_reader.read(df) if not success: return self._default_value() return True, json.dumps(output, sort_keys=True) except Exception as e: log.error("Error parsing file: {}".format(e)) return self._default_value()
def uploadfile_store(request): if request.method == 'POST': try: filename = request.FILES['fileupload'].name filedata = request.FILES['fileupload'].read() file_extension = os.path.splitext(filename)[-1] if file_extension == ".xls" or file_extension == ".xlsx": excel_data = ExcelFile(StringIO.StringIO(filedata)) df = excel_data.parse(excel_data.sheet_names[0],header=None, index_col=None, na_values="") df=df.fillna("") ht = df.to_html(header=True, index=True, float_format=lambda x: '%10.2f' % x, classes="table table-bordered table-striped draggable").encode('utf-8') return HttpResponse(""" <html><head><script type="text/javascript"> window.top.ClearUploadEisup(); </script> <style> table { border-collapse: collapse; margin-left: 30px; } table, th, td { border: 1px solid black; font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 8pt; } </style> </head>%s</html> """ % ht) else: return HttpResponse(""" <html><head><script type="text/javascript"> window.top.ClearUploadEisup(); alert("Формат файла не поддерживается!"); </script></head></html> """) except: return HttpResponse(""" <html><head><script type="text/javascript"> </script></head></html> """)
def from_excel(self, excel_file, units): """Pulls timing info from excel file and stores in an array. Parameters ---------- excel_file: the name of the file to import from. e.g. file.xls units: the units the imported data is in """ try: df = ExcelFile(excel_file).parse('Sheet1') #dataframe rec = df.to_records() #can be converted to numpy array #by using rec.astype all the same type #then calling .view(that type) with the result #supposedly this is faster than the below method dat_arr = np.array(rec.tolist()) #pirate #get rid of the 'index' column from pandas self.data = dat_arr[0:dat_arr.shape[0], 1:self.col_num + 1] self.units = units except IOError: print "Oops." try: self._validate_frames() except FrameError: raise DataError('Bad data', self.data, excel_file)
def read_and_save(file_name, log_file): print(f"Reading {file_name }") xls = ExcelFile(file_name) data = xls.parse(xls.sheet_names[0]) for row_index, row in data.iterrows(): try: if 'OFFICE' in row.keys(): branch = row['OFFICE'] else: branch = row['BRANCH'] if 'BANK NAME' in row.keys(): bank_name = row['BANK NAME'] else: bank_name = row['BANK'] # checking if already there or not obj = BankDetail.objects.filter(ifsc_code=row['IFSC']) if obj: obj.update(branch_name=branch, bank_name=bank_name, branch_address=row['ADDRESS']) else: BankDetail(ifsc_code=row['IFSC'], branch_name=branch, bank_name=bank_name, branch_address=row['ADDRESS']).save() except KeyError as e: log_file.write(file_name) print(e) print( f"Error while reading file { file_name.split(os.sep)[-1] }\nSkipping." ) break
def parse_trans_bonx(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int: col_map = { '交易机构': '交易网点', '交易类型': '交易方式', '借贷标识': '借贷标志', '对方行名': '对方开户行', '对方名称': '对方户名', } tmp_line_num = 0 for sheet in excel_file.sheet_names: tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4) if len(tmp_acc_strs) == 0: continue _name = tmp_acc_strs.iloc[1, 0].split(':')[1] _account = tmp_acc_strs.iloc[2, 0].split(':')[1] _card = tmp_acc_strs.iloc[3, 0].split(':')[1] tmp_trans_sheet = excel_file.parse(sheet_name=sheet, header=6, dtype=str) tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip() tmp_trans_sheet.rename(columns=col_map, inplace=True) tmp_trans_sheet['户名'] = _name tmp_trans_sheet['账号'] = _account tmp_trans_sheet['卡号'] = _card tmp_trans_list_by_sheet.append(tmp_trans_sheet) tmp_line_num += len(tmp_trans_sheet) return tmp_line_num
def __init__(self, xls_filepath): self.filepath = xls_filepath self.xls_reader = ExcelFile(xls_filepath) self.sheet_names = self.xls_reader.sheet_names if len(self.sheet_names) == 1: self.select_sheet(self.sheet_names[0]) self.time = datetime.datetime.now()
def build_totals(): h5_name = "../amounts.h5" store = HDFStore(h5_name) files = [ 'logement_tous_regime', 'openfisca_pfam_tous_regimes', 'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_RegimeGeneral' ] first = True for xlsfile in files: xls = ExcelFile(xlsfile + '.xlsx') df_a = xls.parse('amounts', na_values=['NA']) try: df_b = xls.parse('benef', na_values=['NA']) except: df_b = DataFrame() if first: amounts_df = df_a benef_df = df_b first = False else: amounts_df = concat([amounts_df, df_a]) benef_df = concat([benef_df, df_b]) amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index( "var") print amounts_df.to_string() print benef_df.to_string() store['amounts'] = amounts_df store['benef'] = benef_df store.close
def parse_trans_pab(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int: col_map = { '借方发生额': '交易金额', '交易对方户名': '对方户名', '交易对方账号': '对方账号', '交易对方行名称': '对方开户行', } tmp_line_num = 0 for sheet in excel_file.sheet_names: tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=5) tmp_acc_strs.dropna(how='all', axis=1, inplace=True) _name = tmp_acc_strs.iloc[1, 3] _account = tmp_acc_strs.iloc[1, 1] _card_num = tmp_acc_strs.iloc[2, 1] _currency = tmp_acc_strs.iloc[4, 3] tmp_trans_sheet = excel_file.parse(sheet_name=sheet, header=6, dtype=str, skipfooter=2) tmp_trans_sheet.rename(columns=col_map, inplace=True) tmp_trans_sheet['户名'] = _name tmp_trans_sheet['账号'] = _account tmp_trans_sheet['卡号'] = _card_num tmp_trans_sheet['币种'] = _currency tmp_trans_sheet['交易金额'] = tmp_trans_sheet['交易金额'].str.replace(',', '') tmp_trans_sheet['贷方发生额'] = tmp_trans_sheet['贷方发生额'].str.replace( ',', '') tmp_trans_list_by_sheet.append(tmp_trans_sheet) tmp_line_num += len(tmp_trans_sheet) return tmp_line_num
def build_lookup_table(data_model: FileDataModel, value_column, label_column, workflow_spec_id, field_id): """ In some cases the lookup table can be very large. This method will add all values to the database in a way that can be searched and returned via an api call - rather than sending the full set of options along with the form. It will only open the file and process the options if something has changed. """ xls = ExcelFile(data_model.data) df = xls.parse( xls.sheet_names[0]) # Currently we only look at the fist sheet. df = pd.DataFrame(df).replace({np.nan: None}) if value_column not in df: raise ApiError( "invalid_enum", "The file %s does not contain a column named % s" % (data_model.file_model.name, value_column)) if label_column not in df: raise ApiError( "invalid_enum", "The file %s does not contain a column named % s" % (data_model.file_model.name, label_column)) lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id, field_id=field_id, file_data_model_id=data_model.id, is_ldap=False) db.session.add(lookup_model) for index, row in df.iterrows(): lookup_data = LookupDataModel(lookup_file_model=lookup_model, value=row[value_column], label=row[label_column], data=row.to_dict(OrderedDict)) db.session.add(lookup_data) db.session.commit() return lookup_model
def excel_to_corpus(excel_path, corpus_path): '''NB! Make sure to use .xls file extension for Excel files.''' corpus = PyCorpus(corpus_path) excel = ExcelFile(excel_path) # as we do not know the number of sheets, we parse all of them # until we obtain a error idx = 0 while True: try: df = excel.parse(str(idx)) # recreate some information that was modified when exporting to xls new_df = dict() for col in df.columns: data = [] for v in df[col]: if type(v) == float and math.isnan(v): data.append(None) elif v == 0: data.append(False) elif v == 1: data.append(True) else: data.append(v) new_df[col] = Series(data) corpus[str(idx)] = DataFrame(new_df) except xlrd.biffh.XLRDError: break idx += 1 corpus.close()
def build_totals(): h5_name = "../amounts.h5" store = HDFStore(h5_name) files = ['logement_tous_regime', 'pfam_tous_regimes', 'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_TousRegimes' ] first = True for xlsfile in files: xls = ExcelFile(xlsfile + '.xlsx') print xls.path_or_buf df_a = xls.parse('amounts', na_values=['NA']) try: df_b = xls.parse('benef', na_values=['NA']) except: df_b = DataFrame() if first: amounts_df = df_a benef_df = df_b first = False else: amounts_df = concat([amounts_df, df_a]) benef_df = concat([benef_df, df_b]) amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index("var") print amounts_df.to_string() print benef_df.to_string() store['amounts'] = amounts_df store['benef'] = benef_df store.close
def action_import_pricelists(self): for item in self: active_id = item._context['active_id'] pricelists_obj = item.env[item._context['active_model']] product_obj = item.env['product.product'] data = StringIO(item.binary_data.decode('base64')) xls = ExcelFile(data) data = xls.parse(xls.sheet_names[0]) pricelists_dict = data.to_dict() for running in range(0, len(pricelists_dict['PID'])): pid = str(pricelists_dict['PID'][running]).zfill(7) price_inc_vat = pricelists_dict['Price (Inc. Vat)'][running] product_id = product_obj.search([('default_code', '=', pid) ]).id if not product_id: raise except_orm(_('PID does not exist: %r') % (pid, )) if not pid or not price_inc_vat: raise except_orm(_('Some PID or Price have empty text.')) pricelists_obj.pricelists_line_ids.create({ 'pricelists_id': active_id, 'product_id': product_id, 'price_inc_vat': price_inc_vat, }) return {'type': 'ir.actions.act_window_close'}
def parse_trans_psbc(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int: col_map = { '交易渠道': '交易方式', '交易机构名称': '交易网点', '对方账号/卡号/汇票号': '对方账号', '对方开户机构': '对方开户行', } tmp_line_num = 0 for sheet in excel_file.sheet_names: tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4) if len(tmp_acc_strs) == 0: continue _tmp_str = tmp_acc_strs.iloc[1, 0].split(':') _name = _tmp_str[2] _account = _tmp_str[1].split()[0] _currency = tmp_acc_strs.iloc[3, 0].split(':')[1].split()[0] tmp_trans_sheet = excel_file.parse(sheet_name=sheet, header=5, dtype=str, skipfooter=3) tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip() tmp_trans_sheet.rename(columns=col_map, inplace=True) tmp_trans_sheet['户名'] = _name tmp_trans_sheet['账号'] = _account tmp_trans_sheet['币种'] = _currency tmp_trans_list_by_sheet.append(tmp_trans_sheet) tmp_line_num += len(tmp_trans_sheet) return tmp_line_num
def __init__(self, input_file): """ Create a class to import and parse the excel spreadsheet that is used as an input file for V/UQ-predictivity. """ self.file_name = input_file # Import the excel file: self.xlfile = ExcelFile(self.file_name) # to retrieve & work w/ input
def dump_xlsx2dict(xlsx_file): xls = ExcelFile(xlsx_file) df = xls.parse(xls.sheet_names[0]) dict = df.to_dict() dict2list = [{key: value[i] for key, value in dict.items()} for i in range(len(dict['01_PatientName']))] return dict2list
def build_actualisation_group_names_h5(): h5_name = "../actualisation_groups.h5" store = HDFStore(h5_name) xls = ExcelFile('actualisation_groups.xls') df = xls.parse('defs', na_values=['NA']) store['names'] = df print df.to_string() store.close()
def xls_to_dict(filepath): try: xls = ExcelFile(filepath) except IOError: print '%s File Not found' % filepath return {} df = xls.parse(xls.sheet_names[0]) d = df.to_dict(orient='records') return d
def get_loyer_inflator(year): xls = ExcelFile('../countries/france/data/sources/loyers.xlsx') df = xls.parse('data', na_values=['NA']) irl_2006 = df[(df['year'] == 2006) & (df['quarter'] == 1)]['irl'] # print irl_2006 irl = df[(df['year'] == year) & (df['quarter'] == 1)]['irl'] # print irl return float(irl.values / irl_2006.values)
def start(file_name, download_name="example.xlsx"): with open(file_name, "rb") as f: text = f.read() excel_data = ExcelFile(io.BytesIO(text), engine='openpyxl') test_frame = excel_data.parse(excel_data.sheet_names[0]) return create_download_link_excel(test_frame, download_name)
def get_loyer_inflator(year): xls = ExcelFile('../countries/france/data/sources/loyers.xlsx') df = xls.parse('data', na_values=['NA']) irl_2006 = df[ (df['year'] == 2006) & (df['quarter'] == 1)]['irl'] # print irl_2006 irl = df[ (df['year'] == year) & (df['quarter'] == 1)]['irl'] # print irl return float(irl.values/irl_2006.values)
def openDialog(self): '''Opens a saved .xls file.''' title = 'Open a saved project file...' fileName,f = QFileDialog.getOpenFileName(self,title,self.path) excelFile = ExcelFile(fileName) self.__clearAll() [self.markers.append(Table(sheet,excelFile.parse(sheet))) for sheet in excelFile.sheet_names] [self.tabs.addTab(marker,marker.name) for marker in self.markers] self.__updateView()
def build_lookup_table(file_id, file_name, file_data, value_column, label_column, workflow_spec_id=None, task_spec_id=None, field_id=None): """ In some cases the lookup table can be very large. This method will add all values to the database in a way that can be searched and returned via an api call - rather than sending the full set of options along with the form. It will only open the file and process the options if something has changed. """ try: xlsx = ExcelFile(file_data, engine='openpyxl') # Pandas--or at least openpyxl, cannot read old xls files. # The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files except BadZipFile: raise ApiError( code='excel_error', message= f"Error opening excel file {file_name}. You may have an older .xls spreadsheet. (file_model_id: {file_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})" ) df = xlsx.parse( xlsx.sheet_names[0]) # Currently we only look at the fist sheet. df = df.convert_dtypes() df = df.loc[:, ~df.columns.str.contains( '^Unnamed')] # Drop unnamed columns. df = pd.DataFrame(df).dropna(how='all') # Drop null rows df = pd.DataFrame(df).replace({NA: ''}) if value_column not in df: raise ApiError( "invalid_enum", "The file %s does not contain a column named % s" % (file_name, value_column)) if label_column not in df: raise ApiError( "invalid_enum", "The file %s does not contain a column named % s" % (file_name, label_column)) lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id, field_id=field_id, task_spec_id=task_spec_id, file_model_id=file_id, is_ldap=False) db.session.add(lookup_model) for index, row in df.iterrows(): lookup_data = LookupDataModel(lookup_file_model=lookup_model, value=row[value_column], label=row[label_column], data=row.to_dict(OrderedDict)) db.session.add(lookup_data) db.session.commit() return lookup_model
def loadfields(): ''' Get the variable names in the chosen excel sheet ''' filename = ents[1][1].get() f = path.basename(filename) status.set("Status: loading data and column names of %s" % f.encode().decode()) adds = ExcelFile(filename) sheet = sheet_combo.get() # if first row is not entered, assume 1 and set the form to 1. if frow.get() == "": frow.insert(0, 1) first_row = 1 else: first_row = int(frow.get()) print("%s and %s onwards chosen." % (sheet, first_row)) df = adds.parse(sheet, skiprows=first_row - 1) #print(df.columns.values) print("There are %s observations on this file." % len(df.index)) [ 'Business Name:', 'Street Number:', 'Street Name:', 'City/Borough:', 'Zipcode:', 'Boro Code:' ] defaults = { 0: 'trade', 1: 'legal', 2: 'originaladdress', 3: 'streetnumber', 4: 'streetname', 5: 'Borough', 6: 'pzip', 7: 'boro', 8: 'state', 9: '', 10: '' } for i in range(len(combos)): collist = list(df.columns.values) collist.append("") combs[i][1]['state'] = 'enabled' combs[i][2]['state'] = 'enabled' combs[i][1]['values'] = sorted(collist, key=keyfunction) combs[i][2]['values'] = sorted(collist, key=keyfunction) choose_default(i, 1, collist, defaults[i]) choose_default(i, 2, collist, defaults[i]) chk['state'] = 'enabled' b4['state'] = 'enabled' # print(combs[0][0], df[combs[0][1].get()].head(10)) status.set( "Status: Choose address fields, optionally edit output file, and press 'Geocode'" ) global DFrame DFrame = df return df
def drop_duplicates(final, keep): '''Drop's the Duplicate rows as some files have same repeated rows''' xls_file = ExcelFile(final, index=False) df = xls_file.parse('Page 1') print("\nDropping duplicates") df4 = df.drop_duplicates(subset=['Number', 'Expert_Assigned', 'Opened', 'Definition', 'Value', 'Created'], keep=keep) df5 = df4.sort_values(['Number', 'Created', 'Definition', 'Expert_Assigned'], ascending=[True, True, False, True]) df5.to_excel(final, sheet_name='Page 1', index=False) return
def get_custom_field_config(filename, sheetname): '''Early example of the import of a custom field config based upon a list of field names''' xls = ExcelFile(filename) data = xls.parse(sheetname, index_col=None, na_values=['']) data.columns = ["name", "required", "description"] data["required"] = data["required"].apply(is_true) data = data.fillna('') mydata = [{key: unicode(value) for key, value in point.items()} for point in data.T.to_dict().values()] return mydata
def read_excel(filename): ''' Read Excel File provided by filename. :param filename - path to an Excel file: :return: pandas dataframe ''' xl = ExcelFile(filename) df = xl.parse(xl.sheet_names[0]) return df
def data_import(file) -> Dict[str, DataFrame]: xl = ExcelFile(file) output = dict() for sheet in xl.sheet_names: if "Flags" in sheet or "EMS" in sheet: output[sheet] = xl.parse(sheet_name=sheet, index_col="Parameter") else: try: output[sheet] = xl.parse(sheet_name=sheet, index_col="time") except ValueError: output[sheet] = xl.parse(sheet_name=sheet) return output
def uploadfile_page2(request): if request.method == 'POST': if request.POST.has_key("reestrproj") and request.FILES.has_key("file"): reestrproj = request.POST["reestrproj"] filename = request.FILES['file'].name filedata = request.FILES['file'].read() file_extension = os.path.splitext(filename)[-1] if file_extension != ".xls" and file_extension != ".xlsx": return HttpResponse(""" <html><head><script type="text/javascript"> window.top.ClearUploadP2(); alert("Формат файла не поддерживается!"); </script></head></html> """) else: rp = reestr_proj.objects.get(pk=int(reestrproj, 10)) excel_data = ExcelFile(StringIO.StringIO(filedata)) df = excel_data.parse(excel_data.sheet_names[0], header=None) df=df.fillna("") ht = df.to_html(header=False,index=False, float_format=lambda x: '%10.2f' % x, classes="table table-bordered small").encode('utf-8') data = rp.data data["excel"] = ht rp.data = data rp.save() reestr_proj_comment.objects.create( reestr_proj = rp, user = request.user, comment = u"Загружена таблица показателей", log=True ) return HttpResponse(""" <html><head><script type="text/javascript"> window.top.ClearUploadP2(); window.top.GetTableExcel(); window.top.GetListComments(); </script></head></html> """)
def get_sheet(filename, sheetname): '''Extracts a list of dicts from a worksheet of an Excel file along with the column names, data types and maximum widths''' xls = ExcelFile(filename) data = xls.parse(sheetname, index_col=None, na_values=['']) data = data.fillna('') orig_cols = tuple(data.columns) replace = [get_key_from_field_name(column) for column in data.columns] data.columns = replace types = copy(data.dtypes) for col in replace: data[col] = data[col].values.astype(unicode) return (data.T.to_dict().values(), orig_cols, types, get_widths(data))
def load_moc_and_engineers_info(excel_file: pd.ExcelFile): print("Loading MoC and engineers info") meta_info = excel_file.parse(INPUT_SHEET_NAME, header=4, usecols="A:C") meta_info.rename(columns={meta_info.columns[1]: "E-mail"}, inplace=True) moc_info_df = meta_info[meta_info['Name'].astype(str).str.startswith( 'MoC')] engineer_df = meta_info[meta_info['Name'].astype(str).str.startswith( 'Engineer')] engineer_begin_index = find_engineer_begin_index( excel_file.parse(INPUT_SHEET_NAME, usecols="A", header=None)) print("Found engineer begin index at:", engineer_begin_index) return moc_info_df, engineer_df, engineer_begin_index
def top_row(final): # naming the indexes of the file '''Rename the top row of the sheet with required column names''' xls_file1 = ExcelFile(final, index=True) df = xls_file1.parse('Page 1') print("\nRenaming") # naming the indexes of the file df4 = df.rename( columns={"Unnamed: 0": 'Number', "Unnamed: 1": 'Priority', "Unnamed: 2": 'Opened', "Unnamed: 3": 'Definition', "Unnamed: 4": 'Value', "Unnamed: 5": 'Expert_Assigned', "Unnamed: 6": 'Created', "Unnamed: 7": 'End_time', "Unnamed: 8": 'Resolved', "Unnamed: 9": 'New_Resolved', "Unnamed: 10": 'Closed', "Unnamed: 11": 'Main_file'}) df4.to_excel(final, sheet_name='Page 1', index=False) return
def build_actualisation_group_amounts_h5(): h5_name = "../actualisation_groups.h5" store = HDFStore(h5_name) xls = ExcelFile('actualisation_groups.xls') df_a = xls.parse('amounts', na_values=['NA']) df_a = df_a.set_index(['case'], drop= True) df_b = xls.parse('benef', na_values=['NA']) df_c = xls.parse('corresp', na_values=['NA']) store['amounts'] = df_a store['benef'] = df_b store['corresp'] = df_c print df_a.to_string() print df_a.columns store.close()
def studentsExcelToList(self, fileName, startIndex): file = ExcelFile(fileName) studentsRawData = file.parse( file.sheet_names[0]).to_numpy()[startIndex::] for student in studentsRawData: yield [ student[1], student[3], student[5], student[6], student[7], student[9], student[10], ]
def createSamplesFromExcelKea(self): '''Opens a Kea Sample Batch spreadsheet and imports the samples.''' title = 'Locate Kea sample batch spreadsheet...' fileName,f = QFileDialog.getOpenFileName(self,title,self.path) excelFile = ExcelFile(fileName) imported = excelFile.parse('Data') imported = imported[['PlantID','Sample ID','Plate No','Position on Plate(s)']] imported.columns = ['Plant','Sample','Origin plate','Origin well'] imported = imported.dropna(how='all',subset=['Origin plate','Origin well']) imported['From plate'] = imported['Origin plate'] imported['From well'] = imported['Origin well'] imported['Plate'] = imported['Origin plate'] imported['Well'] = imported['Origin well'] self.markers[0].table = self.markers[0].table.append(imported,ignore_index=True) self.__updateView()
def build_actualisation_group_vars_h5(): h5_name = "../actualisation_groups.h5" store = HDFStore(h5_name) xls = ExcelFile('actualisation_groups.xls') df = xls.parse('data', na_values=['NA']) store['vars'] = df print df.to_string() print store from numpy import unique coeff_list = sorted(unique(df['coeff'].dropna())) print coeff_list groups = {} for coeff in coeff_list: groups[coeff] = list(df[ df['coeff']==coeff ]['var']) print groups store.close()
def load_auditory(self, file): """ Повторяющиеся загружены не будут :param file: :return: """ excel_file = ExcelFile(file) for name in excel_file.sheet_names: raw_frame = excel_file.parse(name, index_col=None, header=None) unresolved_dict = splitter(raw_frame, named=True) if "settings" in unresolved_dict.keys(): tmp = Auditory(unresolved_dict, outer_name=name) if tmp.inner_name in self.auds.keys(): del tmp else: self.auds[tmp.inner_name] = tmp
def list(request): # Handle file upload if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): newdoc = Document(docfile = request.FILES['docfile']) newdoc.save() # Redirect to the document list after POST return HttpResponseRedirect(reverse('myproject.myapp.views.list')) else: form = DocumentForm() # A empty, unbound form # Load documents for the list page documents = Document.objects.all()[4:] a = Document.objects.last() url = 'myproject' + a.docfile.url if os.path.isfile(url): vic = 'TRUE' v = ExcelFile(url).parse("Sheet1", parse_cols=[0, 18, 26, 25, 23]) v.columns = ['Request','Product', 'Paid', 'PaidDate', 'Type'] # it is right!.to_datetime, v.PaidDate = pd.to_datetime(v.PaidDate, format='%d.%m.%Y' ) df = pd.pivot_table(v, values='Paid', rows='PaidDate', cols=['Type', 'Product'], aggfunc=[np.sum, np.count_nonzero]) df = df.resample('M', how='sum') df = df.fillna(value=0) # Render list page with the documents and the form return render_to_response( 'myapp/list.html', {'documents': documents, 'form': form, 'df': df.to_html(classes="table-condensed"),'url':url, 'vic': vic}, context_instance=RequestContext(request) ) else: vic = 'False' return render_to_response( 'myapp/list.html', {'documents': documents, 'form': form, 'url': url, 'vic': vic}, context_instance=RequestContext(request) )
def build_from_insee( directory = None, verbose=False): if directory is None: directory = os.path.dirname(__file__) fname = os.path.join(directory, H5_FILENAME) store = HDFStore(fname) xls = ExcelFile(os.path.join(model.DATA_SOURCES_DIR, "sd2010_t6_fm.xls")) df_age_final = None for year in range(2006,2010): sheet_name = str(year) df = xls.parse(sheet_name, header=0, index_col=0, skiprows=8, parse_cols=[1,2], na_values=['NA']) df.index.name = u"âge" df.rename(columns = {"Unnamed: 1" : year}, inplace = True) # Dealing with te 90 et plus and 105 et plus df = df.reset_index() df = df.dropna(axis=0) df.set_value(106,u"âge", 105) df = df.set_index(u"âge") df.drop(df.index[90], axis=0, inplace=True) df.index.name = u"âge" df = df.reset_index() if verbose: print "year : " + str(year) print df.to_string() if df_age_final is None: df_age_final = df else: df_age_final = df_age_final.merge(df) if verbose: print df_age_final.to_string() print df_age_final.dtypes from numpy import dtype df_age_final[u"âge"] = df_age_final[u"âge"].astype(dtype("int64")) store.put("insee", df_age_final)
def __init__(self, file, from_pickle=False): if from_pickle: data = pickle.load(file) Checker.clean_global_init(data["checker_meta"]) Seat.counters = data["seats_meta"] self.__dict__.update(data["controller"].__dict__) return self.email_handle = list() self.mode = {"people": "None"} self.last_change = None self.people = pd.DataFrame() self.auds = dict() self.inds = list() self.teams = list() self.seed = 1 found_main_settings = False excel_file = ExcelFile(file) for name in excel_file.sheet_names: raw_frame = excel_file.parse(name, index_col=None, header=None) unresolved_dict = splitter(raw_frame, named=True) if "main_settings" in unresolved_dict.keys(): if found_main_settings: raise ControllerException("Две страницы с общими настройками!") found_main_settings = True Checker.raw_global_init(unresolved_dict) self.checker = Checker() if not found_main_settings: raise TypeError("Настройки не найдены, на странице с настройками нужен ключ main_settings") for name in excel_file.sheet_names: raw_frame = excel_file.parse(name, index_col=None, header=None) unresolved_dict = splitter(raw_frame, named=True) if "main_settings" not in unresolved_dict.keys(): tmp = Auditory(unresolved_dict, outer_name=name) if tmp.inner_name in self.auds.keys(): del tmp raise TypeError("Есть одинаковые аудитории") else: self.auds[tmp.inner_name] = tmp self._message_upd()
from pandas import DataFrame, ExcelFile import pandas as pd import json # Path to excel file # Your path will be different, please modify the path below. location = r'c:/users/meinzerc/Desktop/table.xlsx' # Create ExcelFile object xls = ExcelFile(location) # Parse the excel file table = xls.parse('Sheet1') df.head() location = r'c:/users/meinzerc/Desktop/base.xlsx' # Create ExcelFile object xls = ExcelFile(location) # Parse the excel file base = xls.parse('Sheet2') base.head() base.columns = ['File', 'b', 'c', 'd', 'e','f','g','h'] basecut= base[['File','h']] h=pd.DataFrame(basecut.h) final=basecut.File.str.split('\\xa0+\s*\\xa0*') abc=pd.DataFrame(final.tolist(), columns = ['a','b','c','e','f'],index=final.index) work=pd.merge(abc,h,how='left',left_index=True,right_index=True)
# Python version 2.7.5 from pandas import ExcelFile from markovchain import MarkovChain from orderstatemapper import OrderStateMapper from equalordermarkovmatrixcomparator import EqualOrderMarkovMatrixComparator from scipy.stats import chisquare from array import array xlsx = ExcelFile('dane.xls') data = xlsx.parse('strona', parse_cols=[1, 7], index_col=None, na_values=['NA']) #order = 2 #map(lambda x: MarkovChain(x, order).stdout() , data.groupby('grupa').kto.tolist()) model_2 = MarkovChain( data['kto'],2).markov_matrix #print(model_2.markov_matrix.keys()[0][0][0:]) model_1 = MarkovChain( data['kto'],1).markov_matrix model_3 = MarkovChain( data['kto'],3).markov_matrix mapper = OrderStateMapper(model_1, 1, model_2, 2) model_1_adjusted_to_2=mapper.get_lower_order_matrix_adjusted_to_the_higher_one() comparison_model= EqualOrderMarkovMatrixComparator(model_1_adjusted_to_2,model_2) comaprison = comparison_model.get_probabilities_for_transitions() expected,observed=comparison_model.get_probablilities_expected_and_observed()
def get_excel_data_frame(read_excel, skiprows=0, header=None, names=None): data = ExcelFile(read_excel) df = data.parse(data.sheet_names[0], header=header,index_col=None, skiprows=skiprows,names=names, ) return df
def import_data(self, transactions_file): xls = ExcelFile(transactions_file) self.data = xls.parse('Sheet0', index_col=3, na_values=['NA'])
def build_hdf_fr(): # population DIR = '../../data_fr/proj_pop_insee' store = HDFStore(os.path.join(DIR,'proj_pop.h5')) sex_dict = {0: 'populationH', 1: 'populationF'} for fil in os.listdir(DIR): if fil[:7] == 'projpop': filename = os.path.join(DIR, fil) xls = ExcelFile(filename) # sheets = xls.sheet_names pop = None for sex, sheet in sex_dict.items(): df = xls.parse(sheet, skiprows = [0,1,2,3], na_values=['NA'], index_col = 0) df = df.reset_index() del df[df.columns[0]] for i in arange(109,114): df = df.drop([i]) # Rename index df.index.names = ['age'] df.columns = df.columns.astype('int32') df = df.unstack() df.index.names[0] = 'year' df = df.reset_index() df['sex'] = sex if pop is None: pop = df else: pop = pop.append(df) pop['pop'] = pop[0] del pop[0] s = pop[pop['age']>=100] s = s.set_index(['age', 'sex', 'year']) s = s.sum(axis=0, level = ['sex', 'year']) pop = pop.set_index(['age', 'sex', 'year']) for t in s.index: pop.set_value( (100,) + t, 'pop', s.ix[t]['pop']) for a in range(101,109): pop = pop.drop(a, axis =0, level="age") print file[:-4] store[file[:-4]] = pop store.close() # profiles DIR = '../../data_fr' profile_file = 'profils.xls' store = HDFStore(os.path.join(DIR,'profiles.h5')) filename = os.path.join(DIR, profile_file) xls = ExcelFile(filename) sheets = xls.sheet_names profiles = None for sheet in sheets: df = xls.parse(sheet) df['age'] = df['age'].astype(int) df['sex'] = df['sex'].astype(int) df['year'] = 1996 df = df.set_index(['age', 'sex','year']) if profiles is None: profiles = df else: profiles = profiles.merge(df,right_index=True, left_index=True) store['profiles'] = profiles store.close() print 'DONE'
'3uL_HP_0_A4_1','3uL_HP_0_A6_1','3uL_HP_0_A7_1', '3uL_HP_0_A8_1','3uL_HP_0_A9_1','4uL_HP_0_A11_1','4uL_HP_0_A12_1', '4uL_HP_0_B1_1','4uL_HP_0_B10_1','4uL_HP_0_B12_1','4uL_HP_0_B3_1', '4uL_HP_0_B4_1','4uL_HP_0_B6_1','4uL_HP_0_B7_1','4uL_HP_0_B9_1', '5uL_HP_0_C1_1','5uL_HP_0_C10_1','5uL_HP_0_C2_1','5uL_HP_0_C3_1', '5uL_HP_0_C4_1','5uL_HP_0_C5_1','5uL_HP_0_C6_1','5uL_HP_0_C7_1', '5uL_HP_0_C8_1','5uL_HP_0_C9_1','6uL_HP_0_C11_1','6uL_HP_0_C12_1', '6uL_HP_0_D1_1','6uL_HP_0_D2_1','6uL_HP_0_D3_1','6uL_HP_0_D4_1', '6uL_HP_0_D5_1','6uL_HP_0_D6_1','6uL_HP_0_D7_1','6uL_HP_0_D8_1', '7uL_HP_0_D9_1','7uL_HP_0_E1_1','7uL_HP_0_E3_1','7uL_HP_0_E4_1', '7uL_HP_0_E6_1','7uL_HP_0_E7_1','7uL_HP_0_E9_1','7uL_HP_0_F1_1', '7uL_HP_0_F2_1','7uL_HP_0_F3_1', 'STD_0_B11_1','STD_0_B2_1','STD_0_B5_1','STD_0_B8_1','STD_0_E2_1', 'STD_0_E5_1','STD_0_E8_1'] data=ExcelFile('D:\\Database\\Origianl Intensity.xls') dict_merged={} dict_filtered={} dffinal=DataFrame(dict_merged) for sheetname in sheet: dict_merged[sheetname]=data.parse(sheetname,skiprows=[0,1]) drop_columns=['time', 'SN', 'Quality Fac.', 'Res.', 'Area', 'Rel. Intens.', 'FWHM', 'Chi^2', 'Bk. Peak'] dict_dropped={} ''' for sheetname in sheet: dict_dropped[sheetname]=pd.DataFrame(dict_merged[sheetname].drop(drop_columns,axis=1), columns=['m/z','Intens.'])
del df['Names'] # Import libraries from pandas import ExcelFile, DataFrame, concat, date_range import pandas as pd import matplotlib.pyplot as plt import numpy as np df.to_excel('Lesson3.xlsx', index=False) # Location of file Location = r'C:\Users\hdrojas\.xy\startups\Lesson3.xlsx' # Create ExcelFile object xlsx = ExcelFile(Location) # Parse a specific sheet df = xlsx.parse('sheet1',index_col='StatusDate') df.dtypes #list index df.index #convert to upper df.Names = df.Names.apply(lambda x: x.upper()) # Only grab where Status == 1 df = df[df['Status'] == 1] #- For all records in the State column where they are equal to NJ, replace them with NY. df.Names[df.Names == 'BOB'] = 'Chet' df.Names[df.Names == 'Chet'] = 'John'
def get_dataset(self, *args, **kwargs): xls = ExcelFile(self.resource.resource_file.path) if 'sheet' in kwargs: return xls.parse(kwargs['sheet']) return xls.parse("Sheet1")
def readDataFile(self,filename,filetype,tab=None): try: filepath, fileext = os.path.splitext(filename) filepath, filebasename = os.path.split(filename) if not filetype in filetypes: if fileext == '.xlsx': filetype = filetype_xlsx if fileext == '.csv': filetype = filetype = filetype_excelcsv if filetype == filetype_xlsx: xl = ExcelFile(filename) for sheet in xl.sheet_names: try: df = xl.parse(sheet) if tab is None: tab = self.mainWindow.TableTabs.addTable() tab.setDataFrame(df) tab.setName(filebasename+" "+sheet) tab = None else: tab.setDataFrame(df) tab.setName(filebasename+" "+sheet) break except IndexError: pass elif filetype == filetype_excelcsv: df = read_csv(filename, sep=';',encoding='cp1252',dtype=str) if tab is None: tab = self.mainWindow.TableTabs.addTable() tab.setDataFrame(df) tab.setName(filebasename) elif filetype == filetype_excelunicode: df = read_csv(filename, sep="\t",encoding='utf-16LE',dtype=str) if tab is None: tab = self.mainWindow.TableTabs.addTable() tab.setDataFrame(df) tab.setName(filebasename) elif filetype == filetype_facepager: # Automatically detect and remove BOM? # infile = open(filename, 'rb') # raw = infile.read(2) # for enc,boms in \ # ('utf-8',(codecs.BOM_UTF8,)),\ # ('utf-16',(codecs.BOM_UTF16_LE,codecs.BOM_UTF16_BE)),\ # ('utf-32',(codecs.BOM_UTF32_LE,codecs.BOM_UTF32_BE)): # if any(raw.startswith(bom) for bom in boms): # encoding = enc # # break df = read_csv(filename, sep=";",encoding='utf-8-sig',dtype=str) firstcolumn = df.columns.values[0] firstcolumn = firstcolumn[1:] firstcolumn = firstcolumn[:-1] df.columns = [firstcolumn] + df.columns.values[1:].tolist() if tab is None: tab = self.mainWindow.TableTabs.addTable() tab.setDataFrame(df) tab.setName(filebasename) if tab is not None: tab.show() except Exception as e: self.mainWindow.logmessage(e)
def fill_pop_data(): h5_insee = ExcelFile(pop_insee) for year in range(1996, 2007): print year # On extrait la feuille qui nous intéresse : xls = h5_insee.parse(str(year), index_col=0) print xls.columns age_max = max(xls["age"]) print " age_max = ", age_max # On sépare les hommes et les femmes puis on crée la colonne sexe xls_men = xls.loc[:, ["men", "age", "year"]] xls_wom = xls.loc[:, ["women", "age", "year"]] xls_men["sex"] = 0 xls_wom["sex"] = 1 if year == 1996: print "initialisation", year xls_men.set_index(["age", "sex", "year"], inplace=True) xls_wom.set_index(["age", "sex", "year"], inplace=True) corrected_pop_men = xls_men corrected_pop_wom = xls_wom print corrected_pop_men.head().to_string() else: # Il faut gérer le changement de notation des données insee : # à partir de 2000 on enregistre les gens jusqu'à 105 ans au lieu de 100 if age_max > 100: print " Age maximal > 100" print range(age_max.astype("int"), 99, -1) # On somme les personnes de 100 ans et plus tot_men = xls_men.men[xls_men.age >= 100].sum() tot_wom = xls_wom.women[xls_wom.age >= 100].sum() print tot_men, tot_wom # On remplace la valeur des centanaires par la valeur calculée # puis on coupe les dataframes : xls_men.loc[xls_men.age == 100, "men"] = tot_men xls_wom.loc[xls_wom.age == 100, "women"] = tot_wom xls_men.set_index(["age", "sex", "year"], inplace=True) xls_wom.set_index(["age", "sex", "year"], inplace=True) xls_men = xls_men.loc[:(100, 0, year), :] xls_wom = xls_wom.loc[:(100, 1, year), :] # On combine avec le reste : corrected_pop_men = concat([corrected_pop_men, xls_men]) corrected_pop_wom = concat([corrected_pop_wom, xls_wom]) if age_max == 100: # On met en place les index puis on combine xls_men.set_index(["age", "sex", "year"], inplace=True) xls_wom.set_index(["age", "sex", "year"], inplace=True) corrected_pop_men = concat([corrected_pop_men, xls_men]) corrected_pop_wom = concat([corrected_pop_wom, xls_wom]) print corrected_pop_men.head().to_string() if age_max < 100: raise Exception("the maximum recorded age is below 100") print len(corrected_pop_men), " longueur de corrected_pop" print " fin des boucles" print corrected_pop_men.columns corrected_pop_men.columns = ["pop"] corrected_pop_wom.columns = ["pop"] print corrected_pop_men.head(10).to_string() corrected_pop = concat([corrected_pop_men, corrected_pop_wom]) print corrected_pop.head().to_string() print len(corrected_pop) store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5")) store_pop["population"] = corrected_pop
def test(): print "Entering the simulation of C. Bonnet" simulation = Simulation() population_scenario = "projpop0760_FECbasESPbasMIGbas" simulation.load_population(population_filename, population_scenario) # Adding missing population data between 1996 and 2007 : store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5")) corrected_pop = store_pop["population"] print simulation.population.head().to_string() print corrected_pop.head().to_string() print " longueurs des inputs" print "prévisions insee", len(simulation.population), "population corrigée", len(corrected_pop) simulation.population = concat([corrected_pop, simulation.population]) print " longueur après combinaison", len(simulation.population) # Loading profiles : simulation.load_profiles(profiles_filename) xls = ExcelFile(CBonnet_results) """ Hypothesis set #1 : actualization rate r = 3% growth rate g = 1% net_gov_wealth = -3217.7e+09 (unit : Franc Français (FRF) of 1996) non ventilated government spendings in 1996 : 1094e+09 FRF """ # Setting parameters : year_length = 250 simulation.year_length = year_length r = 0.03 g = 0.01 n = 0.00 net_gov_wealth = -3217.7e09 year_gov_spending = (1094) * 1e09 # avg_gov_spendings = 0 # # List w/ the economic affairs # spending_list = [241861, 246856, 245483, 251110, 261752, 271019, # 286330, 290499, 301556, 315994, 315979, 332317, # 343392, 352239, 356353, 356858] # count = 0 # for spent in spending_list: # year_gov_spending = spent*1e+06*((1+g)/(1+r))**count*6.55957 # print year_gov_spending # net_gov_spendings += year_gov_spending # avg_gov_spendings += year_gov_spending # count += 1 # avg_gov_spendings /= (count) # print 'avg_gov_spendings = ', avg_gov_spendings # Loading simulation's parameters : simulation.set_population_projection(year_length=year_length, method="stable") simulation.set_tax_projection(method="per_capita", rate=g) simulation.set_growth_rate(g) simulation.set_discount_rate(r) simulation.set_population_growth_rate(n) simulation.create_cohorts() simulation.set_gov_wealth(net_gov_wealth) simulation.set_gov_spendings(year_gov_spending, default=True, compute=True) # Calculating net transfers : # Net_transfers = tax paid to the state minus money recieved from the state taxes_list = ["tva", "tipp", "cot", "irpp", "impot", "property"] payments_list = ["chomage", "retraite", "revsoc", "maladie", "educ"] simulation.cohorts.compute_net_transfers(name="net_transfers", taxes_list=taxes_list, payments_list=payments_list) """ Reproducing the table 2 : Comptes générationnels par âge et sexe (Compte central) """ # Generating generationnal accounts : year = 1996 simulation.create_present_values(typ="net_transfers") print "PER CAPITA PV" print simulation.percapita_pv.xs(0, level="age").head(10) print simulation.percapita_pv.xs((0, year), level=["sex", "year"]).head(10) # Calculating the Intertemporal Public Liability ipl = simulation.compute_ipl(typ="net_transfers") print "------------------------------------" print "IPL =", ipl print "share of the GDP : ", ipl / 8050.6e09 * 100, "%" print "------------------------------------" # Calculating the generational imbalance gen_imbalance = simulation.compute_gen_imbalance(typ="net_transfers") print "----------------------------------" print "[n_1/n_0=", gen_imbalance, "]" print "----------------------------------" # Creating age classes cohorts_age_class = simulation.create_age_class(typ="net_transfers", step=5) cohorts_age_class._types = [ u"tva", u"tipp", u"cot", u"irpp", u"impot", u"property", u"chomage", u"retraite", u"revsoc", u"maladie", u"educ", u"net_transfers", ] age_class_pv_fe = cohorts_age_class.xs((1, year), level=["sex", "year"]) age_class_pv_ma = cohorts_age_class.xs((0, year), level=["sex", "year"]) print "AGE CLASS PV" print age_class_pv_fe.head() print age_class_pv_ma.head() age_class_pv = concat([age_class_pv_fe, age_class_pv_ma], axis=1) print age_class_pv age_class_pv.to_excel(str(xls_adress) + "\calibration.xlsx", "compte_generation") # Plotting age_class_pv = cohorts_age_class.xs(year, level="year").unstack(level="sex") age_class_pv = age_class_pv["net_transfers"] age_class_pv.columns = ["men", "women"] # age_class_pv['total'] = age_class_pv_ma['net_transfers'] + age_class_pv_fe['net_transfers'] # age_class_pv['total'] *= 1.0/2.0 age_class_theory = xls.parse("Feuil1", index_col=0) age_class_pv["men_CBonnet"] = age_class_theory["men_Cbonnet"] age_class_pv["women_CBonnet"] = age_class_theory["women_Cbonnet"] age_class_pv.plot(style="--") plt.legend() plt.axhline(linewidth=2, color="black") plt.show()