Exemplo n.º 1
0
def get_file(path,filename,filetype):
    
    if filetype == 'csv':
        try:
            return(readcsv(path,filename))
        except:
            print('error in reading csv')
    else:
        if filetype == 'json':
            try:
                with open(f'{path}\\{filename}', 'r') as f:
                    return(pd.DataFrame(js.load(f)))
            except:
                print('error in reading json')
        else:
            if filetype == 'xlsb':
                try:
                    print('reading file...')
                    df = []
                    with open_xlsb(f'{path}\\{filename}') as wb:
                        with wb.get_sheet('Data') as sheet:
                            for row in sheet.rows():
                                df.append([item.v for item in row])
                    print('reading completed ...')
                    return(pd.DataFrame(df[1:], columns=df[0]))
                except:
                    print('error in reading xlsb')
            else:
                print('file format not supported yet in packages')
Exemplo n.º 2
0
    def start_parse(self, file_path, data=None):
        """
        Will first take all columns of the excel and will then read each row.
        Then it will create a dictionary for each row by zipping column names and row entries.
        Each row dict will be added to list, which in turn will be returned as a JSON.
        """
        with open_xlsb(file_path) as wb:

            cols = []
            data_frame = []
            with wb.get_sheet(data) as sheet:
                for row in sheet.rows():
                    l = [item.v for item in row]
                    if not cols:
                        cols = l
                    data_frame.append(l)

        data_frame = pd.DataFrame(data_frame[1:], columns=data_frame[0])
        data = data_frame.to_dict("split")["data"]

        parsable = []
        for d in data:
            for i in d:
                if str(i) == "nan" and math.isnan(i):
                    return None
            parsable.append(dict(zip(cols, d)))

        return json.dumps(parsable)
Exemplo n.º 3
0
    def xlsb_export_csv(self,sheet_number,outputname):
          """
		Enter the sheet number, and ouput file name in the format of .csv that you need to 
		read and create a data frame, you can use itertions to upload multiple sheets.
	  """
	  df =[]
          with open_xlsb(self.inputfilename) as wb:
              with wb.get_sheet(sheet_number) as sheet:
                  for row in sheet.rows():
                      df.append([item.v for item in row])
                  df = pd.DataFrame(df[1:], columns=df[0])
                  df.to_csv(outputname)
Exemplo n.º 4
0
    def read_xlsb(self,sheet_number):
         """
		Enter the sheet number that you need to read and create a data frame, you 
		can use itertions to upload multiple sheets.
	 """
	 df =[]
         with open_xlsb(self.inputfilename) as wb:
             with wb.get_sheet(sheet_number) as sheet:
                 for row in sheet.rows():
                     df.append([item.v for item in row])
                 df = pd.DataFrame(df[1:], columns=df[0])
                 return df
Exemplo n.º 5
0
def pandas_read_xlsb_file(filepath):
    """
    https://stackoverflow.com/questions/45019778/read-xlsb-file-in-pandas-python
    Accepts a filepath - pathlib.Path object
    Reads an xlsb file into a dataframe
    Returns the dataframe
    """
    dfs = []
    with open_xlsb(filepath) as wb:
        with wb.get_sheet(1) as sheet:
            for row in sheet.rows():
                dfs.append([item.v for item in row])
    df = pd.DataFrame(dfs[1:], columns=dfs[0])
    return df
Exemplo n.º 6
0
def extract(macro_module):
    df = []
    #read xlsb
    with open_xlsb('./test/TCMUR.xlsb') as wb:
        with wb.get_sheet(1) as sheet:
            for row in sheet.rows():
                df.append([item.v for item in row])
    #get week list
    wk_list = []
    for y in range(len(df[1])):
        if df[13][y] is not None and df[13][y] != 'REGION' and df[13][
                y] != 'COUNTRY' and df[13][y] != 'CENTER' and df[13][
                    y] != 'KPI':
            wk_list.append(df[13][y])

    #transfer list to dataframe
    df = (pd.DataFrame(df[14:], columns=df[13]))

    #drop row and column where are all NaN data
    df.dropna(axis=0, how='all', inplace=True)
    df.dropna(axis=1, how='all', inplace=True)

    #extract value of area
    dfOnlyValue = df.iloc[:, 3:]
    apac = df['REGION'].values.tolist().index('APAC Rate')
    df_Value_APAC = dfOnlyValue[:apac]

    #extract country
    apac_country = []
    for i in range(len(df['COUNTRY'])):
        if df['COUNTRY'][i] is not None and i < apac:
            apac_country.append(df['COUNTRY'][i])

    #deal with problem of string
    for j in range(len(apac_country)):
        if apac_country[j] == "CEETE B'IVRRE":
            apac_country[j] = 'CEETE B\"IVRRE'

    #items
    kpi = ['Rate', 'U Qty', 'C Qty', 'Target']

    #Region total rate
    apac_total = df.iloc[:, -1:][apac:apac + 1].values.tolist()[0][0]

    #insert APAC's Rate to mysql
    process.drawTCMUR(df_Value_APAC.values, wk_list, 'APAC', apac_country, kpi,
                      macro_module, apac_total)
Exemplo n.º 7
0
class xlsb :
    def __init__(self,inputfilename):
        self.inputfilename = inputfilename
   
    def read_xlsb(self,sheet_number):
         """
		Enter the sheet number that you need to read and create a data frame, you 
		can use itertions to upload multiple sheets.
	 """
	 df =[]
         with open_xlsb(self.inputfilename) as wb:
             with wb.get_sheet(sheet_number) as sheet:
                 for row in sheet.rows():
                     df.append([item.v for item in row])
                 df = pd.DataFrame(df[1:], columns=df[0])
                 return df
        
    def xlsb_export_csv(self,sheet_number,outputname):
          """
		Enter the sheet number, and ouput file name in the format of .csv that you need to 
		read and create a data frame, you can use itertions to upload multiple sheets.
	  """
	  df =[]
          with open_xlsb(self.inputfilename) as wb:
              with wb.get_sheet(sheet_number) as sheet:
                  for row in sheet.rows():
                      df.append([item.v for item in row])
                  df = pd.DataFrame(df[1:], columns=df[0])
                  df.to_csv(outputname)
                  
    def xlsb_export_xlsx(self,sheet_number,outputname,sheet_name="Sheet1"):
	 """
		Enter the sheet number,output file name and sheet name that you need to read and
		create a data frame, you can use itertions to export multiple sheets.
	 """        
	df =[]
        with open_xlsb(self.inputfilename) as wb:
              with wb.get_sheet(sheet_number) as sheet:
                  for row in sheet.rows():
                      df.append([item.v for item in row])
                  df = pd.DataFrame(df[1:], columns=df[0])
                  df.to_excel(outputname,  sheet_name=sheet_name)
def saveExcelRowsInArray():
    global fileName, sheetName, fileExist, rowArray, rowNumber, helpNeeded, sqlStatement, tableName, columnNameArray, sysCliColumn, noOfRowsToCopy

    with open_xlsb(fileName) as wb:
        with wb.get_sheet(sheetName) as sheet:
            print('Reading rows of Excel sheet')
            print(
                'This could take a while, depending on how many rows you are going to use in the SQL Statement'
            )
            print('')
            for row in sheet.rows(sparse=True):
                if (not rowNumber == 0):
                    #print("RowNumber != 0")
                    if (rowNumber <= int(noOfRowsToCopy)
                        ):  # (51) This will take the first 50 rows
                        ## Go Through columns and put them into an array inside another array
                        ## This way, the data will be a lot more readable
                        rowArray.append([item.v for item in row])
                    else:
                        break
                rowNumber = rowNumber + 1
Exemplo n.º 9
0
def get_data_hsbc():
    file_name = foldername + "/" + "hsbc" + file_date + ".xlsb"
    wb = open_xlsb(file_name)
    data = list()
    sheets = len(wb.sheets)
    for i in range(2, sheets):
        sheet = wb.get_sheet(i)
        i = 0
        for row in sheet.rows():
            cells = list(row)
            if i == 2:
                scheme_name = cells[2][2]
            if i >= 6:
                if cells[2][2] != None:
                    entry = {
                        "Name of scheme": scheme_name,
                        "Date": render_date(cells[1][2]),
                        "REGULAR_Base_TER_perc": render_value(cells[2][2]),
                        "REGULAR_Addnl_52_6a_b_exp_perc":
                        render_value(cells[3][2]),
                        "REGULAR_Addnl_52_6a_c_exp_perc":
                        render_value(cells[4][2]),
                        "REGULAR_GST_perc": render_value(cells[5][2]),
                        "REGULAR_Total_ter_perc": render_value(cells[6][2]),
                        "DIRECT_Base_TER_perc": render_value(cells[9][2]),
                        "DIRECT_Addnl_52_6a_b_exp_perc":
                        render_value(cells[10][2]),
                        "DIRECT_Addnl_52_6a_c_exp_perc":
                        render_value(cells[11][2]),
                        "DIRECT_GST_perc": render_value(cells[12][2]),
                        "DIRECT_Total_ter_perc": render_value(cells[13][2])
                    }

                    data.append(entry)
            i += 1
    return data
Exemplo n.º 10
0
import pandas as pd
import numpy as np

from pyxlsb import open_workbook as open_xlsb

df = []

Years = [
    '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
    '2009', '2010', '2011', '2012', '2013', '2014'
]
for year in Years:

    with open_xlsb('WIOT' + year + '_Nov16_ROW.xlsb') as wb:
        with wb.get_sheet(1) as sheet:
            for row in sheet.rows():
                df.append([item.v for item in row])

    df = pd.DataFrame(df[1:])

    matrix = df.values

    industries = list(df.iloc[2].unique())
    industries.remove(None)
    industries.remove('(millions of US$)')
    industries.remove('Final consumption expenditure by households')
    industries.remove(
        'Final consumption expenditure by non-profit organisations serving households (NPISH)'
    )
    industries.remove('Final consumption expenditure by government')
    industries.remove('Gross fixed capital formation')
Exemplo n.º 11
0
import numpy as np
import pandas as pd

from pyxlsb import open_workbook as open_xlsb
df = []

with open_xlsb('AVS Dashboard - 2019.xlsb') as wb:
    with wb.get_sheet("Raw Data") as sheet:
        for row in sheet.rows():
            df.append([item.v for item in row])

df1 = pd.DataFrame(df[1:], columns=df[0])


    


import os
import pandas as pd
from pyxlsb import open_workbook as open_xlsb
os.chdir(r'C:\Users\paulo.roberto\Downloads\Plan PNT')
path = (r'C:\Users\paulo.roberto\Downloads\Plan PNT')

df = pd.DataFrame()

for i in os.listdir(path):
    with open_xlsb(i).get_sheet('Plan PNT') as sheet:
        df1 = pd.DataFrame(sheet.v for v in sheet)
        df = df.append(df1)
        sheet.close()

df1.to_excel("teste.xlsx")
# In[1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

# In[2]:

from pyxlsb import open_workbook as open_xlsb

# In[204]:

df = []

with open_xlsb('WIOT2000_Nov16_ROW.xlsb') as wb:
    with wb.get_sheet(1) as sheet:
        for row in sheet.rows():
            df.append([item.v for item in row])

df = pd.DataFrame(df[1:])

matrix = df.values

industries = list(df.iloc[2].unique())
industries.remove(None)
industries.remove('(millions of US$)')
industries.remove('Final consumption expenditure by households')
industries.remove(
    'Final consumption expenditure by non-profit organisations serving households (NPISH)'
)
Exemplo n.º 14
0
 def get_sheet_names(file_path):
     """
     Will return the list of sheet names this Excel is containing.
     """
     with open_xlsb(file_path) as wb:
         return wb.sheets
Exemplo n.º 15
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:

import pandas as pd
from pyxlsb import open_workbook as open_xlsb

df1 = []

with open_xlsb(
        'C:/Users/HP/Desktop/EXL EQ 2020/EXL_EQ_2020_Train_datasets.xlsb'
) as wb:
    with wb.get_sheet(1) as sheet:
        for row in sheet.rows():
            df1.append([item.v for item in row])

df1 = pd.DataFrame(df1[1:], columns=df1[0])

# In[2]:

import pandas as pd
from pyxlsb import open_workbook as open_xlsb

df2 = []

with open_xlsb('C:/Users/HP/Desktop/EXL EQ 2020/EXL_EQ_2020_Test_Datasets.xlsb'
               ) as wb:
    with wb.get_sheet(2) as sheet:
        for row in sheet.rows():
            df2.append([item.v for item in row])
Exemplo n.º 16
0

# In[261]:


from pyxlsb import open_workbook as open_xlsb


# In[262]:


# load the dataset.

df = []

with open_xlsb('WC_Data_Science_Case_Study.xlsb') as wb:
    with wb.get_sheet(1) as sheet:
        for row in sheet.rows():
            df.append([item.v for item in row])

df = pd.DataFrame(df[1:], columns=df[0])

df.head()


# In[263]:


# list of column names.
df.columns
"""
if calendar.day_name[date.today().weekday()] == "Sunday":
    wk = date.today().isocalendar()[1]
else:
    wk = date.today().isocalendar()[1] - 1
"""
wk = 52
fold = "wk" + str(wk)

# Importing raw data
os.chdir("C:/Users/" + username + "/Desktop/AVS Dashboard")
file_path = "AVS Dashboard - 2019.xlsb"

df = []

with open_xlsb(file_path) as wb:
    with wb.get_sheet("Raw Data") as sheet:
        for row in sheet.rows():
            df.append([item.v for item in row])

df = pd.DataFrame(df[1:], columns=df[0])

# Creating week folder on Desktop
source_dir = "C:/Users/" + username + "/Desktop"
os.chdir(source_dir)
if not os.path.exists(fold):
    os.makedirs(fold)
os.chdir(fold)

# Tasks pending from more than 50 days
df["Open Ageing"] = pd.to_numeric(df["Open Ageing"], errors="coerce")