Exemplo n.º 1
0
def diag_aggregates():

    years = ['2006', '2007', '2008', '2009']

    df_final = None
    for yr in years:
        xls = ExcelFile(fname_all)
        df = xls.parse(yr, hindex_col= True)

        cols = [u"Mesure",
                u"Dépense \n(millions d'€)",
                u"Bénéficiaires \n(milliers)",
                u"Dépenses \nréelles \n(millions d'€)",
                u"Bénéficiaires \nréels \n(milliers)",
                u"Diff. relative \nDépenses",
                u"Diff. relative \nBénéficiaires"]
        selected_cols = [u"Mesure", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"]
        df = df[selected_cols]
        df['year'] = yr
        df['num'] = range(len(df.index))
        df = df.set_index(['num', u'Mesure', 'year'])
        if df_final is None:
            df_final = df
        else:

            df_final = df_final.append(df, ignore_index=False)

#    DataFrame.groupby()
    df_final = df_final.sortlevel(0)
    print str(fname_all)[:-5]+'_diag.xlsx'
    writer = ExcelWriter(str(fname_all)[:-5]+'_diag.xlsx')
    df_final.to_excel(writer, sheet_name="diagnostics", float_format="%.2f")
    writer.save()
Exemplo n.º 2
0
class EXCEL:
    def __init__(self, xls_filepath):
        self.filepath = xls_filepath
        self.xls_reader = ExcelFile(xls_filepath)
        self.sheet_names = self.xls_reader.sheet_names
        if len(self.sheet_names) == 1:
            self.select_sheet(self.sheet_names[0])
        self.time = datetime.datetime.now()

    def add(self):
        pass

    @property
    def data(self):
        return self._data

    def select_sheet(self, sheet_name):
        self._data = self.xls_reader.parse(
            sheet_name)  #self._data = pd.read_excel(xls_filepath)

    def merge_sheet(self):
        sheets = []
        for sheet_name in self.sheet_names:
            sheet = self.xls_reader.parse(sheet_name)
            sheets.append(sheet)
        self._data = pd.concat(sheets)

    def save(self, xls_filepath, sheet_name='Sheet5'):
        self.xls_reader.close()
        self.xls_writer = ExcelWriter(xls_filepath)
        self._data.to_excel(self.xls_writer, sheet_name)
        self.xls_writer.save()
Exemplo n.º 3
0
def read_excel(fname, header=None):
    """Read excel into dict.
    Args:
        fname: name of excel file
        header: The finland files does not have a header
    Output:
        dictionary containing the data
    """
    xls = ExcelFile(fname)
    if header:
        parse_cols = [1]
    else:
        parse_cols = None

    df = xls.parse(xls.sheet_names[0], skiprows=1, parse_cols=parse_cols)

    # Fix keys
    temp = df.to_dict()
    for key in temp:
        new_key = key.replace(" - ", "_")
        temp[new_key] = temp.pop(key)
    # Stupid hack for Finland
    if header:
        temp[header] = temp.pop(temp.keys()[0])

    return temp
Exemplo n.º 4
0
def network_UKGDS(filename,header=28):
	"""
	Load Excel file with UKGDS data format and build dict array of bus coordinates
	and graph structure suitable for plotting with the networkx module.
	"""
	from numpy import array,where
	from pandas import ExcelFile
	from networkx import Graph

	data = ExcelFile(filename)
	bus = data.parse("Buses",header=header)
	branch = data.parse("Branches",header=header)
	pos = {}
	for node in range(len(bus["BNU"])):
		pos.update({node:array([bus["BXC"][node],bus["BYC"][node]])})
	net = []
	for k in range(len(branch["CFB"])):
		von = where(bus["BNU"]==branch["CFB"][k])[0][0]
		zu  = where(bus["BNU"]==branch["CTB"][k])[0][0]
		net.append([von,zu])
	nodes = set([n1 for n1,n2 in net] + [n2 for n1,n2 in net])
	G = Graph()
	for node in nodes:
		G.add_node(node)
	for edge in net:
		G.add_edge(edge[0],edge[1])
	return G,pos
Exemplo n.º 5
0
    def convert(self, file_bytes):
        """Accepts a bytes array and returns a json string """

        excel_file = None

        try:
            excel_file = ExcelFile(BytesIO(file_bytes))
        except Exception as e:
            log.error("Error reading in excel bytes, {}".format(e))

        if excel_file is None:
            return self._default_value()

        if self.sheet_reader.sheet_name not in excel_file.sheet_names:
            return self._default_value()

        try:
            df = excel_file.parse(self.sheet_reader.sheet_name)
            success, output = self.sheet_reader.read(df)

            if not success:
                return self._default_value()

            return True, json.dumps(output, sort_keys=True)
        except Exception as e:
            log.error("Error parsing file: {}".format(e))

        return self._default_value()
Exemplo n.º 6
0
def uploadfile_store(request):

    if request.method == 'POST':

        try:

            filename = request.FILES['fileupload'].name
            filedata = request.FILES['fileupload'].read()

            file_extension = os.path.splitext(filename)[-1]



            if file_extension == ".xls" or file_extension == ".xlsx":

                excel_data = ExcelFile(StringIO.StringIO(filedata))
                df = excel_data.parse(excel_data.sheet_names[0],header=None, index_col=None, na_values="")
                df=df.fillna("")
                ht = df.to_html(header=True, index=True, float_format=lambda x: '%10.2f' % x, classes="table table-bordered table-striped draggable").encode('utf-8')




                return HttpResponse("""
                <html><head><script type="text/javascript">
                window.top.ClearUploadEisup();            
                </script>
                <style>
                table {
                        border-collapse: collapse;
                        margin-left: 30px;
                }
    
                table, th, td {
                        border: 1px solid black;
                        font-family: Verdana, Arial, Helvetica, sans-serif; 
                        font-size: 8pt;  
                }
                </style>
                </head>%s</html>
                """ % ht)


            else:



                return HttpResponse("""
                <html><head><script type="text/javascript">                
                    window.top.ClearUploadEisup();            
                    alert("Формат файла не поддерживается!");
                </script></head></html>
                """)


        except:
            return HttpResponse("""
            <html><head><script type="text/javascript">                
            </script></head></html>
            """)
Exemplo n.º 7
0
    def from_excel(self, excel_file, units):
        """Pulls timing info from excel file and stores in an array.
        Parameters
        ----------
            excel_file:
                the name of the file to import from. 
                e.g. file.xls
            units:
                the units the imported data is in
        """
        try:
            df = ExcelFile(excel_file).parse('Sheet1') #dataframe
            rec = df.to_records()

            #can be converted to numpy array
            #by using rec.astype all the same type
            #then calling .view(that type) with the result 
            #supposedly this is faster than the below method

            dat_arr = np.array(rec.tolist()) #pirate

            #get rid of the 'index' column from pandas
            self.data = dat_arr[0:dat_arr.shape[0], 1:self.col_num + 1]
            self.units = units
        except IOError:
            print "Oops."
        try:
            self._validate_frames()
        except FrameError:
            raise DataError('Bad data', self.data, excel_file)
Exemplo n.º 8
0
def read_and_save(file_name, log_file):
    print(f"Reading {file_name }")
    xls = ExcelFile(file_name)
    data = xls.parse(xls.sheet_names[0])
    for row_index, row in data.iterrows():
        try:
            if 'OFFICE' in row.keys():
                branch = row['OFFICE']
            else:
                branch = row['BRANCH']
            if 'BANK NAME' in row.keys():
                bank_name = row['BANK NAME']
            else:
                bank_name = row['BANK']

            # checking if already there or not
            obj = BankDetail.objects.filter(ifsc_code=row['IFSC'])
            if obj:
                obj.update(branch_name=branch,
                           bank_name=bank_name,
                           branch_address=row['ADDRESS'])
            else:
                BankDetail(ifsc_code=row['IFSC'],
                           branch_name=branch,
                           bank_name=bank_name,
                           branch_address=row['ADDRESS']).save()

        except KeyError as e:
            log_file.write(file_name)
            print(e)
            print(
                f"Error while reading file { file_name.split(os.sep)[-1] }\nSkipping."
            )
            break
Exemplo n.º 9
0
def parse_trans_bonx(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '交易机构': '交易网点',
        '交易类型': '交易方式',
        '借贷标识': '借贷标志',
        '对方行名': '对方开户行',
        '对方名称': '对方户名',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4)
        if len(tmp_acc_strs) == 0:
            continue
        _name = tmp_acc_strs.iloc[1, 0].split(':')[1]
        _account = tmp_acc_strs.iloc[2, 0].split(':')[1]
        _card = tmp_acc_strs.iloc[3, 0].split(':')[1]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=6,
                                           dtype=str)
        tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip()
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['卡号'] = _card
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num
Exemplo n.º 10
0
 def __init__(self, xls_filepath):
     self.filepath = xls_filepath
     self.xls_reader = ExcelFile(xls_filepath)
     self.sheet_names = self.xls_reader.sheet_names
     if len(self.sheet_names) == 1:
         self.select_sheet(self.sheet_names[0])
     self.time = datetime.datetime.now()
Exemplo n.º 11
0
def build_totals():
    h5_name = "../amounts.h5"
    store = HDFStore(h5_name)
    files = [
        'logement_tous_regime', 'openfisca_pfam_tous_regimes',
        'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_RegimeGeneral'
    ]
    first = True
    for xlsfile in files:
        xls = ExcelFile(xlsfile + '.xlsx')
        df_a = xls.parse('amounts', na_values=['NA'])
        try:
            df_b = xls.parse('benef', na_values=['NA'])
        except:
            df_b = DataFrame()

        if first:
            amounts_df = df_a
            benef_df = df_b
            first = False
        else:
            amounts_df = concat([amounts_df, df_a])
            benef_df = concat([benef_df, df_b])

    amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index(
        "var")
    print amounts_df.to_string()
    print benef_df.to_string()
    store['amounts'] = amounts_df
    store['benef'] = benef_df
    store.close
Exemplo n.º 12
0
def parse_trans_pab(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '借方发生额': '交易金额',
        '交易对方户名': '对方户名',
        '交易对方账号': '对方账号',
        '交易对方行名称': '对方开户行',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=5)
        tmp_acc_strs.dropna(how='all', axis=1, inplace=True)
        _name = tmp_acc_strs.iloc[1, 3]
        _account = tmp_acc_strs.iloc[1, 1]
        _card_num = tmp_acc_strs.iloc[2, 1]
        _currency = tmp_acc_strs.iloc[4, 3]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=6,
                                           dtype=str,
                                           skipfooter=2)
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['卡号'] = _card_num
        tmp_trans_sheet['币种'] = _currency
        tmp_trans_sheet['交易金额'] = tmp_trans_sheet['交易金额'].str.replace(',', '')
        tmp_trans_sheet['贷方发生额'] = tmp_trans_sheet['贷方发生额'].str.replace(
            ',', '')
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num
Exemplo n.º 13
0
    def build_lookup_table(data_model: FileDataModel, value_column,
                           label_column, workflow_spec_id, field_id):
        """ In some cases the lookup table can be very large.  This method will add all values to the database
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
        xls = ExcelFile(data_model.data)
        df = xls.parse(
            xls.sheet_names[0])  # Currently we only look at the fist sheet.
        df = pd.DataFrame(df).replace({np.nan: None})
        if value_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (data_model.file_model.name, value_column))
        if label_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (data_model.file_model.name, label_column))

        lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
                                       field_id=field_id,
                                       file_data_model_id=data_model.id,
                                       is_ldap=False)

        db.session.add(lookup_model)
        for index, row in df.iterrows():
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
                                          data=row.to_dict(OrderedDict))
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model
Exemplo n.º 14
0
def excel_to_corpus(excel_path, corpus_path):
    '''NB! Make sure to use .xls file extension for Excel files.'''
    corpus = PyCorpus(corpus_path)
    excel  = ExcelFile(excel_path)
    # as we do not know the number of sheets, we parse all of them
    # until we obtain a error
    idx = 0
    while True:
        try:
            df = excel.parse(str(idx))
            # recreate some information that was modified when exporting to xls
            new_df = dict()
            for col in df.columns:
                data = []
                for v in df[col]:
                    if type(v) == float and math.isnan(v):
                        data.append(None)
                    elif v == 0:
                        data.append(False)
                    elif v == 1:
                        data.append(True)
                    else:
                        data.append(v)
                new_df[col] = Series(data)
            corpus[str(idx)] = DataFrame(new_df)
        except xlrd.biffh.XLRDError:
            break
        idx += 1
    corpus.close()
def build_totals():
    h5_name = "../amounts.h5"
    store = HDFStore(h5_name)

    files = ['logement_tous_regime', 'pfam_tous_regimes',
             'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_TousRegimes' ]

    first = True
    for xlsfile in files:
        xls = ExcelFile(xlsfile + '.xlsx')
        print xls.path_or_buf
        df_a = xls.parse('amounts', na_values=['NA'])
        try:
            df_b   = xls.parse('benef', na_values=['NA'])
        except:
            df_b = DataFrame()

        if first:
            amounts_df = df_a
            benef_df =  df_b
            first = False
        else:
            amounts_df = concat([amounts_df, df_a])
            benef_df =  concat([benef_df, df_b])

    amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index("var")
    print amounts_df.to_string()
    print benef_df.to_string()
    store['amounts'] = amounts_df
    store['benef']   = benef_df
    store.close
Exemplo n.º 16
0
    def action_import_pricelists(self):
        for item in self:
            active_id = item._context['active_id']
            pricelists_obj = item.env[item._context['active_model']]
            product_obj = item.env['product.product']
            data = StringIO(item.binary_data.decode('base64'))
            xls = ExcelFile(data)
            data = xls.parse(xls.sheet_names[0])
            pricelists_dict = data.to_dict()

            for running in range(0, len(pricelists_dict['PID'])):
                pid = str(pricelists_dict['PID'][running]).zfill(7)
                price_inc_vat = pricelists_dict['Price (Inc. Vat)'][running]
                product_id = product_obj.search([('default_code', '=', pid)
                                                 ]).id

                if not product_id:
                    raise except_orm(_('PID does not exist: %r') % (pid, ))
                if not pid or not price_inc_vat:
                    raise except_orm(_('Some PID or Price have empty text.'))

                pricelists_obj.pricelists_line_ids.create({
                    'pricelists_id':
                    active_id,
                    'product_id':
                    product_id,
                    'price_inc_vat':
                    price_inc_vat,
                })

        return {'type': 'ir.actions.act_window_close'}
Exemplo n.º 17
0
def parse_trans_psbc(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '交易渠道': '交易方式',
        '交易机构名称': '交易网点',
        '对方账号/卡号/汇票号': '对方账号',
        '对方开户机构': '对方开户行',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4)
        if len(tmp_acc_strs) == 0:
            continue
        _tmp_str = tmp_acc_strs.iloc[1, 0].split(':')
        _name = _tmp_str[2]
        _account = _tmp_str[1].split()[0]
        _currency = tmp_acc_strs.iloc[3, 0].split(':')[1].split()[0]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=5,
                                           dtype=str,
                                           skipfooter=3)
        tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip()
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['币种'] = _currency
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num
Exemplo n.º 18
0
 def __init__(self, input_file):
     """
     Create a class to import and parse the excel spreadsheet that is used
     as an input file for V/UQ-predictivity.
     """
     self.file_name = input_file
     # Import the excel file:
     self.xlfile = ExcelFile(self.file_name)  # to retrieve & work w/ input
Exemplo n.º 19
0
def dump_xlsx2dict(xlsx_file):
    xls = ExcelFile(xlsx_file)
    df = xls.parse(xls.sheet_names[0])
    dict = df.to_dict()
    dict2list = [{key: value[i] for key, value in dict.items()}
           for i in range(len(dict['01_PatientName']))]

    return dict2list
Exemplo n.º 20
0
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
Exemplo n.º 22
0
def xls_to_dict(filepath):
    try:
        xls = ExcelFile(filepath)
    except IOError:
        print '%s File Not found' % filepath
        return {}
    df = xls.parse(xls.sheet_names[0])
    d = df.to_dict(orient='records')
    return d
def get_loyer_inflator(year):

    xls = ExcelFile('../countries/france/data/sources/loyers.xlsx')
    df = xls.parse('data', na_values=['NA'])
    irl_2006 = df[(df['year'] == 2006) & (df['quarter'] == 1)]['irl']
    #    print irl_2006
    irl = df[(df['year'] == year) & (df['quarter'] == 1)]['irl']
    #    print irl
    return float(irl.values / irl_2006.values)
Exemplo n.º 24
0
def start(file_name, download_name="example.xlsx"):

    with open(file_name, "rb") as f:
        text = f.read()

    excel_data = ExcelFile(io.BytesIO(text), engine='openpyxl')
    test_frame = excel_data.parse(excel_data.sheet_names[0])

    return create_download_link_excel(test_frame, download_name)
Exemplo n.º 25
0
def get_loyer_inflator(year):

    xls = ExcelFile('../countries/france/data/sources/loyers.xlsx')
    df = xls.parse('data', na_values=['NA'])
    irl_2006 = df[ (df['year'] == 2006) & (df['quarter'] == 1)]['irl']
#    print irl_2006
    irl = df[ (df['year'] == year) & (df['quarter'] == 1)]['irl']
#    print irl
    return float(irl.values/irl_2006.values)
Exemplo n.º 26
0
	def openDialog(self):
		'''Opens a saved .xls file.'''
		title									= 'Open a saved project file...'
		fileName,f								= QFileDialog.getOpenFileName(self,title,self.path)
		excelFile								= ExcelFile(fileName)
		self.__clearAll()
		[self.markers.append(Table(sheet,excelFile.parse(sheet))) for sheet in excelFile.sheet_names]
		[self.tabs.addTab(marker,marker.name) for marker in self.markers]
		self.__updateView()
Exemplo n.º 27
0
    def build_lookup_table(file_id,
                           file_name,
                           file_data,
                           value_column,
                           label_column,
                           workflow_spec_id=None,
                           task_spec_id=None,
                           field_id=None):
        """ In some cases the lookup table can be very large.  This method will add all values to the database
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
        try:
            xlsx = ExcelFile(file_data, engine='openpyxl')
        # Pandas--or at least openpyxl, cannot read old xls files.
        # The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files
        except BadZipFile:
            raise ApiError(
                code='excel_error',
                message=
                f"Error opening excel file {file_name}. You may have an older .xls spreadsheet. (file_model_id: {file_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})"
            )
        df = xlsx.parse(
            xlsx.sheet_names[0])  # Currently we only look at the fist sheet.
        df = df.convert_dtypes()
        df = df.loc[:, ~df.columns.str.contains(
            '^Unnamed')]  # Drop unnamed columns.
        df = pd.DataFrame(df).dropna(how='all')  # Drop null rows
        df = pd.DataFrame(df).replace({NA: ''})

        if value_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (file_name, value_column))
        if label_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (file_name, label_column))

        lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
                                       field_id=field_id,
                                       task_spec_id=task_spec_id,
                                       file_model_id=file_id,
                                       is_ldap=False)

        db.session.add(lookup_model)
        for index, row in df.iterrows():
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
                                          data=row.to_dict(OrderedDict))
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model
Exemplo n.º 28
0
def loadfields():
    '''
    Get the variable names in the chosen excel sheet
    '''

    filename = ents[1][1].get()
    f = path.basename(filename)
    status.set("Status: loading data and column names of %s" %
               f.encode().decode())
    adds = ExcelFile(filename)
    sheet = sheet_combo.get()
    #   if first row is not entered, assume 1 and set the form to 1.
    if frow.get() == "":
        frow.insert(0, 1)
        first_row = 1
    else:
        first_row = int(frow.get())
    print("%s and %s onwards chosen." % (sheet, first_row))
    df = adds.parse(sheet, skiprows=first_row - 1)
    #print(df.columns.values)
    print("There are %s observations on this file." % len(df.index))
    [
        'Business Name:', 'Street Number:', 'Street Name:', 'City/Borough:',
        'Zipcode:', 'Boro Code:'
    ]
    defaults = {
        0: 'trade',
        1: 'legal',
        2: 'originaladdress',
        3: 'streetnumber',
        4: 'streetname',
        5: 'Borough',
        6: 'pzip',
        7: 'boro',
        8: 'state',
        9: '',
        10: ''
    }
    for i in range(len(combos)):
        collist = list(df.columns.values)
        collist.append("")
        combs[i][1]['state'] = 'enabled'
        combs[i][2]['state'] = 'enabled'
        combs[i][1]['values'] = sorted(collist, key=keyfunction)
        combs[i][2]['values'] = sorted(collist, key=keyfunction)
        choose_default(i, 1, collist, defaults[i])
        choose_default(i, 2, collist, defaults[i])
    chk['state'] = 'enabled'
    b4['state'] = 'enabled'
    #    print(combs[0][0], df[combs[0][1].get()].head(10))
    status.set(
        "Status: Choose address fields, optionally edit output file, and press 'Geocode'"
    )
    global DFrame
    DFrame = df
    return df
Exemplo n.º 29
0
def drop_duplicates(final, keep):
    '''Drop's the Duplicate rows as some files have same repeated rows'''
    xls_file = ExcelFile(final, index=False)
    df = xls_file.parse('Page 1')
    print("\nDropping duplicates")
    df4 = df.drop_duplicates(subset=['Number', 'Expert_Assigned', 'Opened', 'Definition', 'Value', 'Created'],
                             keep=keep)
    df5 = df4.sort_values(['Number', 'Created', 'Definition', 'Expert_Assigned'], ascending=[True, True, False, True])
    df5.to_excel(final, sheet_name='Page 1', index=False)
    return
Exemplo n.º 30
0
def get_custom_field_config(filename, sheetname):
    '''Early example of the import of a custom field config based upon a list of field names'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data.columns = ["name", "required", "description"]
    data["required"] = data["required"].apply(is_true)

    data = data.fillna('')
    mydata = [{key: unicode(value) for key, value in point.items()} for point in data.T.to_dict().values()]
    return mydata
Exemplo n.º 31
0
def read_excel(filename):
    '''
    Read Excel File provided by filename.

    :param filename - path to an Excel file:
    :return: pandas dataframe
    '''
    xl = ExcelFile(filename)
    df = xl.parse(xl.sheet_names[0])
    return df
Exemplo n.º 32
0
def data_import(file) -> Dict[str, DataFrame]:
    xl = ExcelFile(file)
    output = dict()
    for sheet in xl.sheet_names:
        if "Flags" in sheet or "EMS" in sheet:
            output[sheet] = xl.parse(sheet_name=sheet, index_col="Parameter")
        else:
            try:
                output[sheet] = xl.parse(sheet_name=sheet, index_col="time")
            except ValueError:
                output[sheet] = xl.parse(sheet_name=sheet)
    return output
Exemplo n.º 33
0
def get_custom_field_config(filename, sheetname):
    '''Early example of the import of a custom field config based upon a list of field names'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data.columns = ["name", "required", "description"]
    data["required"] = data["required"].apply(is_true)

    data = data.fillna('')
    mydata = [{key: unicode(value)
               for key, value in point.items()}
              for point in data.T.to_dict().values()]
    return mydata
Exemplo n.º 34
0
def uploadfile_page2(request):


    if request.method == 'POST':

        if request.POST.has_key("reestrproj") and request.FILES.has_key("file"):

            reestrproj = request.POST["reestrproj"]

            filename = request.FILES['file'].name
            filedata = request.FILES['file'].read()

            file_extension = os.path.splitext(filename)[-1]

            if file_extension != ".xls" and file_extension != ".xlsx":

                return HttpResponse("""
                <html><head><script type="text/javascript">
                    window.top.ClearUploadP2();
                    alert("Формат файла не поддерживается!");
                </script></head></html>
                """)

            else:

                rp = reestr_proj.objects.get(pk=int(reestrproj, 10))

                excel_data = ExcelFile(StringIO.StringIO(filedata))
                df = excel_data.parse(excel_data.sheet_names[0], header=None)
                df=df.fillna("")
                ht = df.to_html(header=False,index=False, float_format=lambda x: '%10.2f' % x, classes="table table-bordered small").encode('utf-8')

                data = rp.data
                data["excel"] = ht
                rp.data = data
                rp.save()

                reestr_proj_comment.objects.create(
                    reestr_proj = rp,
                    user = request.user,
                    comment = u"Загружена таблица показателей",
                    log=True
                )



    return HttpResponse("""
    <html><head><script type="text/javascript">
        window.top.ClearUploadP2();
        window.top.GetTableExcel();
        window.top.GetListComments();
    </script></head></html>
    """)
Exemplo n.º 35
0
def get_sheet(filename, sheetname):
    '''Extracts a list of dicts from a worksheet of an Excel file along with the
    column names, data types and maximum widths'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data = data.fillna('')
    orig_cols = tuple(data.columns)
    replace = [get_key_from_field_name(column) for column in data.columns]
    data.columns = replace
    types = copy(data.dtypes)
    for col in replace:
        data[col] = data[col].values.astype(unicode)
    return (data.T.to_dict().values(), orig_cols, types, get_widths(data))
Exemplo n.º 36
0
def load_moc_and_engineers_info(excel_file: pd.ExcelFile):
    print("Loading MoC and engineers info")
    meta_info = excel_file.parse(INPUT_SHEET_NAME, header=4, usecols="A:C")
    meta_info.rename(columns={meta_info.columns[1]: "E-mail"}, inplace=True)

    moc_info_df = meta_info[meta_info['Name'].astype(str).str.startswith(
        'MoC')]
    engineer_df = meta_info[meta_info['Name'].astype(str).str.startswith(
        'Engineer')]
    engineer_begin_index = find_engineer_begin_index(
        excel_file.parse(INPUT_SHEET_NAME, usecols="A", header=None))
    print("Found engineer begin index at:", engineer_begin_index)
    return moc_info_df, engineer_df, engineer_begin_index
Exemplo n.º 37
0
def get_sheet(filename, sheetname):
    '''Extracts a list of dicts from a worksheet of an Excel file along with the
    column names, data types and maximum widths'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data = data.fillna('')
    orig_cols = tuple(data.columns)
    replace = [get_key_from_field_name(column) for column in data.columns]
    data.columns = replace
    types = copy(data.dtypes)
    for col in replace:
        data[col] = data[col].values.astype(unicode)
    return (data.T.to_dict().values(), orig_cols, types, get_widths(data))
Exemplo n.º 38
0
def top_row(final):  # naming the indexes of the file
    '''Rename the top row of the sheet with required column names'''
    xls_file1 = ExcelFile(final, index=True)
    df = xls_file1.parse('Page 1')
    print("\nRenaming")
    # naming the indexes of the file
    df4 = df.rename(
        columns={"Unnamed: 0": 'Number', "Unnamed: 1": 'Priority', "Unnamed: 2": 'Opened', "Unnamed: 3": 'Definition',
                 "Unnamed: 4": 'Value', "Unnamed: 5": 'Expert_Assigned', "Unnamed: 6": 'Created',
                 "Unnamed: 7": 'End_time', "Unnamed: 8": 'Resolved', "Unnamed: 9": 'New_Resolved',
                 "Unnamed: 10": 'Closed', "Unnamed: 11": 'Main_file'})
    df4.to_excel(final, sheet_name='Page 1', index=False)
    return
def build_actualisation_group_amounts_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df_a = xls.parse('amounts', na_values=['NA'])
    df_a = df_a.set_index(['case'], drop= True)
    df_b = xls.parse('benef', na_values=['NA'])
    df_c = xls.parse('corresp', na_values=['NA'])
    store['amounts'] = df_a
    store['benef']   = df_b
    store['corresp'] = df_c
    print df_a.to_string()
    print df_a.columns
    store.close()
Exemplo n.º 40
0
 def studentsExcelToList(self, fileName, startIndex):
     file = ExcelFile(fileName)
     studentsRawData = file.parse(
         file.sheet_names[0]).to_numpy()[startIndex::]
     for student in studentsRawData:
         yield [
             student[1],
             student[3],
             student[5],
             student[6],
             student[7],
             student[9],
             student[10],
         ]
Exemplo n.º 41
0
	def createSamplesFromExcelKea(self):
		'''Opens a Kea Sample Batch spreadsheet and imports the samples.'''
		title									= 'Locate Kea sample batch spreadsheet...'
		fileName,f								= QFileDialog.getOpenFileName(self,title,self.path)
		excelFile								= ExcelFile(fileName)
		imported								= excelFile.parse('Data')
		imported								= imported[['PlantID','Sample ID','Plate No','Position on Plate(s)']]
		imported.columns						= ['Plant','Sample','Origin plate','Origin well']
		imported								= imported.dropna(how='all',subset=['Origin plate','Origin well'])
		imported['From plate']					= imported['Origin plate']
		imported['From well']					= imported['Origin well']
		imported['Plate']						= imported['Origin plate']
		imported['Well']						= imported['Origin well']
		self.markers[0].table					= self.markers[0].table.append(imported,ignore_index=True)
		self.__updateView()
def build_actualisation_group_vars_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('data', na_values=['NA'])
    store['vars'] = df
    print df.to_string()
    print store
    from numpy import unique
    coeff_list = sorted(unique(df['coeff'].dropna()))
    print coeff_list
    groups = {}
    for coeff in coeff_list:
        groups[coeff] = list(df[ df['coeff']==coeff ]['var'])
    print groups
    store.close()
Exemplo n.º 43
0
 def load_auditory(self, file):
     """
     Повторяющиеся загружены не будут
     :param file:
     :return:
     """
     excel_file = ExcelFile(file)
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "settings" in unresolved_dict.keys():
             tmp = Auditory(unresolved_dict, outer_name=name)
             if tmp.inner_name in self.auds.keys():
                 del tmp
             else:
                 self.auds[tmp.inner_name] = tmp
Exemplo n.º 44
0
def list(request):
    # Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():
            newdoc = Document(docfile = request.FILES['docfile'])
            newdoc.save()



            # Redirect to the document list after POST
            return HttpResponseRedirect(reverse('myproject.myapp.views.list'))

    else:
        form = DocumentForm() # A empty, unbound form


    # Load documents for the list page
    documents = Document.objects.all()[4:]

    a = Document.objects.last()
    url = 'myproject' + a.docfile.url

    if os.path.isfile(url):
        vic = 'TRUE'

        v = ExcelFile(url).parse("Sheet1", parse_cols=[0, 18, 26, 25, 23])
        v.columns = ['Request','Product', 'Paid', 'PaidDate', 'Type']
        # it is right!.to_datetime,
        v.PaidDate = pd.to_datetime(v.PaidDate, format='%d.%m.%Y' )
        df = pd.pivot_table(v, values='Paid', rows='PaidDate', cols=['Type', 'Product'], aggfunc=[np.sum, np.count_nonzero])
        df = df.resample('M', how='sum')
        df = df.fillna(value=0)

        # Render list page with the documents and the form
        return render_to_response(
            'myapp/list.html',
            {'documents': documents, 'form': form, 'df': df.to_html(classes="table-condensed"),'url':url, 'vic': vic},
            context_instance=RequestContext(request)
        )
    else:
        vic = 'False'
        return render_to_response(
            'myapp/list.html',
            {'documents': documents, 'form': form, 'url': url, 'vic': vic},
            context_instance=RequestContext(request)
        )
Exemplo n.º 45
0
def build_from_insee( directory = None, verbose=False):

    if directory is None:
        directory = os.path.dirname(__file__)

    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)
    xls = ExcelFile(os.path.join(model.DATA_SOURCES_DIR, "sd2010_t6_fm.xls"))

    df_age_final = None

    for year in range(2006,2010):
        sheet_name = str(year)

        df = xls.parse(sheet_name, header=0, index_col=0, skiprows=8, parse_cols=[1,2], na_values=['NA'])

        df.index.name = u"âge"
        df.rename(columns = {"Unnamed: 1" : year}, inplace = True)

        # Dealing with te 90 et plus and 105 et plus
        df = df.reset_index()
        df = df.dropna(axis=0)
        df.set_value(106,u"âge", 105)
        df = df.set_index(u"âge")
        df.drop(df.index[90], axis=0, inplace=True)
        df.index.name = u"âge"
        df = df.reset_index()
        if verbose:
            print "year : " + str(year)
            print df.to_string()


        if df_age_final is None:
            df_age_final = df
        else:
            df_age_final = df_age_final.merge(df)

    if verbose:
        print df_age_final.to_string()
        print df_age_final.dtypes

    from numpy import dtype
    df_age_final[u"âge"] = df_age_final[u"âge"].astype(dtype("int64"))
    store.put("insee", df_age_final)
Exemplo n.º 46
0
 def __init__(self, file, from_pickle=False):
     if from_pickle:
         data = pickle.load(file)
         Checker.clean_global_init(data["checker_meta"])
         Seat.counters = data["seats_meta"]
         self.__dict__.update(data["controller"].__dict__)
         return
     self.email_handle = list()
     self.mode = {"people": "None"}
     self.last_change = None
     self.people = pd.DataFrame()
     self.auds = dict()
     self.inds = list()
     self.teams = list()
     self.seed = 1
     found_main_settings = False
     excel_file = ExcelFile(file)
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "main_settings" in unresolved_dict.keys():
             if found_main_settings:
                 raise ControllerException("Две страницы с общими настройками!")
             found_main_settings = True
             Checker.raw_global_init(unresolved_dict)
             self.checker = Checker()
     if not found_main_settings:
         raise TypeError("Настройки не найдены, на странице с настройками нужен ключ main_settings")
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "main_settings" not in unresolved_dict.keys():
             tmp = Auditory(unresolved_dict, outer_name=name)
             if tmp.inner_name in self.auds.keys():
                 del tmp
                 raise TypeError("Есть одинаковые аудитории")
             else:
                 self.auds[tmp.inner_name] = tmp
     self._message_upd()
from pandas import DataFrame, ExcelFile
import pandas as pd
import json


# Path to excel file
# Your path will be different, please modify the path below.
location = r'c:/users/meinzerc/Desktop/table.xlsx'

# Create ExcelFile object
xls = ExcelFile(location)

# Parse the excel file
table = xls.parse('Sheet1')
df.head()

location = r'c:/users/meinzerc/Desktop/base.xlsx'

# Create ExcelFile object
xls = ExcelFile(location)

# Parse the excel file
base = xls.parse('Sheet2')
base.head()

base.columns = ['File', 'b', 'c', 'd', 'e','f','g','h']
basecut= base[['File','h']]
h=pd.DataFrame(basecut.h)
final=basecut.File.str.split('\\xa0+\s*\\xa0*')
abc=pd.DataFrame(final.tolist(), columns = ['a','b','c','e','f'],index=final.index)
work=pd.merge(abc,h,how='left',left_index=True,right_index=True)
Exemplo n.º 48
0
# Python version 2.7.5
from pandas import ExcelFile
from markovchain import MarkovChain
from orderstatemapper import OrderStateMapper
from equalordermarkovmatrixcomparator import EqualOrderMarkovMatrixComparator
from scipy.stats import chisquare
from array import array

xlsx = ExcelFile('dane.xls')
data = xlsx.parse('strona', parse_cols=[1, 7], index_col=None, na_values=['NA'])

#order = 2
#map(lambda x: MarkovChain(x, order).stdout() , data.groupby('grupa').kto.tolist())

model_2 = MarkovChain( data['kto'],2).markov_matrix
#print(model_2.markov_matrix.keys()[0][0][0:])

model_1 = MarkovChain( data['kto'],1).markov_matrix

model_3 = MarkovChain( data['kto'],3).markov_matrix


mapper = OrderStateMapper(model_1, 1, model_2, 2)


model_1_adjusted_to_2=mapper.get_lower_order_matrix_adjusted_to_the_higher_one()

comparison_model= EqualOrderMarkovMatrixComparator(model_1_adjusted_to_2,model_2)
comaprison = comparison_model.get_probabilities_for_transitions()
expected,observed=comparison_model.get_probablilities_expected_and_observed()
Exemplo n.º 49
0
def get_excel_data_frame(read_excel, skiprows=0, header=None, names=None):
    data = ExcelFile(read_excel)
    df = data.parse(data.sheet_names[0], header=header,index_col=None, skiprows=skiprows,names=names, )
    return df
Exemplo n.º 50
0
 def import_data(self, transactions_file):
     xls = ExcelFile(transactions_file)
     self.data = xls.parse('Sheet0', index_col=3, na_values=['NA'])
Exemplo n.º 51
0
Arquivo: build.py Projeto: Pyke75/ga
def build_hdf_fr():
        
    # population
    DIR = '../../data_fr/proj_pop_insee'
        
    store = HDFStore(os.path.join(DIR,'proj_pop.h5'))    
    sex_dict = {0: 'populationH', 1: 'populationF'} 

    for fil in os.listdir(DIR):
        if fil[:7] == 'projpop':
            filename = os.path.join(DIR, fil)
            xls = ExcelFile(filename)
#            sheets = xls.sheet_names
            pop = None
                    
            for sex, sheet in sex_dict.items():
                df = xls.parse(sheet, skiprows = [0,1,2,3],
                           na_values=['NA'], index_col = 0)
                df = df.reset_index()
                del df[df.columns[0]]
                for i in arange(109,114): df = df.drop([i])
                # Rename index
                df.index.names = ['age']    
                df.columns = df.columns.astype('int32')
                df = df.unstack()
                df.index.names[0] = 'year'
                df = df.reset_index()
                df['sex'] = sex
                if pop is None:
                    pop = df
                else:
                    pop = pop.append(df)
               
            pop['pop'] = pop[0]
            del pop[0]
            
            s = pop[pop['age']>=100] 
            s = s.set_index(['age', 'sex', 'year'])
            s = s.sum(axis=0, level = ['sex', 'year'])
            
            pop = pop.set_index(['age', 'sex', 'year'])

            for t in s.index:
                pop.set_value( (100,) + t, 'pop', s.ix[t]['pop'])

            for a in range(101,109):
                pop = pop.drop(a, axis =0, level="age")
            print file[:-4]
            store[file[:-4]] = pop

    store.close()
    
    # profiles
    DIR = '../../data_fr'
    profile_file = 'profils.xls'
    store = HDFStore(os.path.join(DIR,'profiles.h5'))
    filename = os.path.join(DIR, profile_file)
    xls = ExcelFile(filename)
    sheets = xls.sheet_names
    profiles = None
    for sheet in sheets:
        df = xls.parse(sheet)
        df['age'] = df['age'].astype(int)
        df['sex'] = df['sex'].astype(int)
        df['year'] = 1996
        df = df.set_index(['age', 'sex','year']) 
        
        
        if profiles is None:
            profiles = df
        else:
            profiles = profiles.merge(df,right_index=True, left_index=True)
        
    store['profiles'] = profiles
    

    
    store.close()
    print 'DONE'
       '3uL_HP_0_A4_1','3uL_HP_0_A6_1','3uL_HP_0_A7_1',
       '3uL_HP_0_A8_1','3uL_HP_0_A9_1','4uL_HP_0_A11_1','4uL_HP_0_A12_1',
       '4uL_HP_0_B1_1','4uL_HP_0_B10_1','4uL_HP_0_B12_1','4uL_HP_0_B3_1',
       '4uL_HP_0_B4_1','4uL_HP_0_B6_1','4uL_HP_0_B7_1','4uL_HP_0_B9_1',
       '5uL_HP_0_C1_1','5uL_HP_0_C10_1','5uL_HP_0_C2_1','5uL_HP_0_C3_1',
       '5uL_HP_0_C4_1','5uL_HP_0_C5_1','5uL_HP_0_C6_1','5uL_HP_0_C7_1',
       '5uL_HP_0_C8_1','5uL_HP_0_C9_1','6uL_HP_0_C11_1','6uL_HP_0_C12_1',
       '6uL_HP_0_D1_1','6uL_HP_0_D2_1','6uL_HP_0_D3_1','6uL_HP_0_D4_1',
       '6uL_HP_0_D5_1','6uL_HP_0_D6_1','6uL_HP_0_D7_1','6uL_HP_0_D8_1',
       '7uL_HP_0_D9_1','7uL_HP_0_E1_1','7uL_HP_0_E3_1','7uL_HP_0_E4_1',
       '7uL_HP_0_E6_1','7uL_HP_0_E7_1','7uL_HP_0_E9_1','7uL_HP_0_F1_1',
       '7uL_HP_0_F2_1','7uL_HP_0_F3_1',
       'STD_0_B11_1','STD_0_B2_1','STD_0_B5_1','STD_0_B8_1','STD_0_E2_1',
       'STD_0_E5_1','STD_0_E8_1']

data=ExcelFile('D:\\Database\\Origianl Intensity.xls')
dict_merged={}
dict_filtered={}
dffinal=DataFrame(dict_merged)

for sheetname in sheet:
   dict_merged[sheetname]=data.parse(sheetname,skiprows=[0,1])

drop_columns=['time', 'SN', 'Quality Fac.', 'Res.', 'Area', 'Rel. Intens.', 'FWHM', 'Chi^2', 'Bk. Peak']
dict_dropped={}

'''
for sheetname in sheet:
   dict_dropped[sheetname]=pd.DataFrame(dict_merged[sheetname].drop(drop_columns,axis=1),
                columns=['m/z','Intens.'])
del df['Names']


# Import libraries
from pandas import ExcelFile, DataFrame, concat, date_range
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df.to_excel('Lesson3.xlsx', index=False)

# Location of file
Location = r'C:\Users\hdrojas\.xy\startups\Lesson3.xlsx'

# Create ExcelFile object
xlsx = ExcelFile(Location)

# Parse a specific sheet
df = xlsx.parse('sheet1',index_col='StatusDate')
df.dtypes
#list index
df.index

#convert to upper
df.Names = df.Names.apply(lambda x: x.upper())
# Only grab where Status == 1
df = df[df['Status'] == 1]

#- For all records in the State column where they are equal to NJ, replace them with NY.
df.Names[df.Names == 'BOB'] = 'Chet' 
df.Names[df.Names == 'Chet'] = 'John'
Exemplo n.º 54
0
 def get_dataset(self, *args, **kwargs):
     xls = ExcelFile(self.resource.resource_file.path)
     if 'sheet' in kwargs:
         return xls.parse(kwargs['sheet'])
     return xls.parse("Sheet1")
Exemplo n.º 55
0
    def readDataFile(self,filename,filetype,tab=None):
        try:
            filepath, fileext = os.path.splitext(filename)
            filepath, filebasename = os.path.split(filename)

            if not filetype in filetypes:
                if fileext == '.xlsx':
                    filetype = filetype_xlsx
                if fileext == '.csv':
                    filetype = filetype = filetype_excelcsv


            if filetype == filetype_xlsx:
                xl = ExcelFile(filename)
                for sheet in xl.sheet_names:
                    try:
                        df = xl.parse(sheet)

                        if tab is None:
                            tab = self.mainWindow.TableTabs.addTable()
                            tab.setDataFrame(df)
                            tab.setName(filebasename+" "+sheet)
                            tab = None
                        else:
                            tab.setDataFrame(df)
                            tab.setName(filebasename+" "+sheet)
                            break

                    except IndexError:
                        pass

            elif filetype == filetype_excelcsv:
                df = read_csv(filename, sep=';',encoding='cp1252',dtype=str)
                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            elif filetype == filetype_excelunicode:
                df = read_csv(filename, sep="\t",encoding='utf-16LE',dtype=str)
                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            elif filetype == filetype_facepager:

# Automatically detect and remove BOM?
#                 infile = open(filename, 'rb')
#                 raw = infile.read(2)
#                 for enc,boms in \
#                         ('utf-8',(codecs.BOM_UTF8,)),\
#                         ('utf-16',(codecs.BOM_UTF16_LE,codecs.BOM_UTF16_BE)),\
#                         ('utf-32',(codecs.BOM_UTF32_LE,codecs.BOM_UTF32_BE)):
#                     if any(raw.startswith(bom) for bom in boms):
#                         encoding = enc
#
#                         break

                df = read_csv(filename, sep=";",encoding='utf-8-sig',dtype=str)

                firstcolumn = df.columns.values[0]
                firstcolumn = firstcolumn[1:]
                firstcolumn = firstcolumn[:-1]
                df.columns = [firstcolumn] + df.columns.values[1:].tolist()


                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            if tab is not None:
                tab.show()

        except Exception as e:
            self.mainWindow.logmessage(e)
Exemplo n.º 56
0
def fill_pop_data():

    h5_insee = ExcelFile(pop_insee)

    for year in range(1996, 2007):
        print year

        # On extrait la feuille qui nous intéresse :
        xls = h5_insee.parse(str(year), index_col=0)
        print xls.columns
        age_max = max(xls["age"])
        print "    age_max = ", age_max

        # On sépare les hommes et les femmes puis on crée la colonne sexe
        xls_men = xls.loc[:, ["men", "age", "year"]]
        xls_wom = xls.loc[:, ["women", "age", "year"]]

        xls_men["sex"] = 0
        xls_wom["sex"] = 1

        if year == 1996:
            print "initialisation", year
            xls_men.set_index(["age", "sex", "year"], inplace=True)
            xls_wom.set_index(["age", "sex", "year"], inplace=True)

            corrected_pop_men = xls_men
            corrected_pop_wom = xls_wom
            print corrected_pop_men.head().to_string()

        else:
            # Il faut gérer le changement de notation des données insee :
            # à partir de 2000 on enregistre les gens jusqu'à 105 ans au lieu de 100

            if age_max > 100:
                print "    Age maximal > 100"
                print range(age_max.astype("int"), 99, -1)

                # On somme les personnes de 100 ans et plus
                tot_men = xls_men.men[xls_men.age >= 100].sum()
                tot_wom = xls_wom.women[xls_wom.age >= 100].sum()
                print tot_men, tot_wom

                # On remplace la valeur des centanaires par la valeur calculée
                # puis on coupe les dataframes :
                xls_men.loc[xls_men.age == 100, "men"] = tot_men
                xls_wom.loc[xls_wom.age == 100, "women"] = tot_wom

                xls_men.set_index(["age", "sex", "year"], inplace=True)
                xls_wom.set_index(["age", "sex", "year"], inplace=True)

                xls_men = xls_men.loc[:(100, 0, year), :]
                xls_wom = xls_wom.loc[:(100, 1, year), :]

                # On combine avec le reste :
                corrected_pop_men = concat([corrected_pop_men, xls_men])
                corrected_pop_wom = concat([corrected_pop_wom, xls_wom])

            if age_max == 100:
                # On met en place les index puis on combine
                xls_men.set_index(["age", "sex", "year"], inplace=True)
                xls_wom.set_index(["age", "sex", "year"], inplace=True)

                corrected_pop_men = concat([corrected_pop_men, xls_men])
                corrected_pop_wom = concat([corrected_pop_wom, xls_wom])

                print corrected_pop_men.head().to_string()

            if age_max < 100:
                raise Exception("the maximum recorded age is below 100")

        print len(corrected_pop_men), "    longueur de corrected_pop"

    print "    fin des boucles"
    print corrected_pop_men.columns
    corrected_pop_men.columns = ["pop"]
    corrected_pop_wom.columns = ["pop"]

    print corrected_pop_men.head(10).to_string()

    corrected_pop = concat([corrected_pop_men, corrected_pop_wom])
    print corrected_pop.head().to_string()
    print len(corrected_pop)
    store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5"))
    store_pop["population"] = corrected_pop
Exemplo n.º 57
0
def test():
    print "Entering the simulation of C. Bonnet"

    simulation = Simulation()
    population_scenario = "projpop0760_FECbasESPbasMIGbas"
    simulation.load_population(population_filename, population_scenario)

    # Adding missing population data between 1996 and 2007 :
    store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5"))
    corrected_pop = store_pop["population"]
    print simulation.population.head().to_string()
    print corrected_pop.head().to_string()
    print "    longueurs des inputs"
    print "prévisions insee", len(simulation.population), "population corrigée", len(corrected_pop)

    simulation.population = concat([corrected_pop, simulation.population])
    print "    longueur après combinaison", len(simulation.population)

    # Loading profiles :
    simulation.load_profiles(profiles_filename)
    xls = ExcelFile(CBonnet_results)

    """
    Hypothesis set #1 : 
    actualization rate r = 3%
    growth rate g = 1%
    net_gov_wealth = -3217.7e+09 (unit : Franc Français (FRF) of 1996)
    non ventilated government spendings in 1996 : 1094e+09 FRF
    """

    # Setting parameters :
    year_length = 250
    simulation.year_length = year_length
    r = 0.03
    g = 0.01
    n = 0.00
    net_gov_wealth = -3217.7e09
    year_gov_spending = (1094) * 1e09

    #     avg_gov_spendings = 0
    #     # List w/ the economic affairs
    #     spending_list = [241861, 246856, 245483, 251110, 261752, 271019,
    #                      286330,    290499,    301556,    315994,    315979,    332317,
    #                      343392,    352239,    356353,    356858]
    #     count = 0
    #     for spent in spending_list:
    #         year_gov_spending = spent*1e+06*((1+g)/(1+r))**count*6.55957
    #         print year_gov_spending
    #         net_gov_spendings += year_gov_spending
    #         avg_gov_spendings += year_gov_spending
    #         count += 1

    #     avg_gov_spendings /= (count)
    #     print 'avg_gov_spendings = ', avg_gov_spendings

    # Loading simulation's parameters :
    simulation.set_population_projection(year_length=year_length, method="stable")
    simulation.set_tax_projection(method="per_capita", rate=g)
    simulation.set_growth_rate(g)
    simulation.set_discount_rate(r)
    simulation.set_population_growth_rate(n)
    simulation.create_cohorts()
    simulation.set_gov_wealth(net_gov_wealth)
    simulation.set_gov_spendings(year_gov_spending, default=True, compute=True)

    # Calculating net transfers :
    # Net_transfers = tax paid to the state minus money recieved from the state
    taxes_list = ["tva", "tipp", "cot", "irpp", "impot", "property"]
    payments_list = ["chomage", "retraite", "revsoc", "maladie", "educ"]
    simulation.cohorts.compute_net_transfers(name="net_transfers", taxes_list=taxes_list, payments_list=payments_list)

    """
    Reproducing the table 2 : Comptes générationnels par âge et sexe (Compte central)
    """
    # Generating generationnal accounts :
    year = 1996
    simulation.create_present_values(typ="net_transfers")
    print "PER CAPITA PV"
    print simulation.percapita_pv.xs(0, level="age").head(10)
    print simulation.percapita_pv.xs((0, year), level=["sex", "year"]).head(10)

    # Calculating the Intertemporal Public Liability
    ipl = simulation.compute_ipl(typ="net_transfers")
    print "------------------------------------"
    print "IPL =", ipl
    print "share of the GDP : ", ipl / 8050.6e09 * 100, "%"
    print "------------------------------------"

    # Calculating the generational imbalance
    gen_imbalance = simulation.compute_gen_imbalance(typ="net_transfers")
    print "----------------------------------"
    print "[n_1/n_0=", gen_imbalance, "]"
    print "----------------------------------"

    # Creating age classes
    cohorts_age_class = simulation.create_age_class(typ="net_transfers", step=5)
    cohorts_age_class._types = [
        u"tva",
        u"tipp",
        u"cot",
        u"irpp",
        u"impot",
        u"property",
        u"chomage",
        u"retraite",
        u"revsoc",
        u"maladie",
        u"educ",
        u"net_transfers",
    ]
    age_class_pv_fe = cohorts_age_class.xs((1, year), level=["sex", "year"])
    age_class_pv_ma = cohorts_age_class.xs((0, year), level=["sex", "year"])

    print "AGE CLASS PV"
    print age_class_pv_fe.head()
    print age_class_pv_ma.head()

    age_class_pv = concat([age_class_pv_fe, age_class_pv_ma], axis=1)
    print age_class_pv
    age_class_pv.to_excel(str(xls_adress) + "\calibration.xlsx", "compte_generation")

    # Plotting
    age_class_pv = cohorts_age_class.xs(year, level="year").unstack(level="sex")
    age_class_pv = age_class_pv["net_transfers"]
    age_class_pv.columns = ["men", "women"]
    #     age_class_pv['total'] = age_class_pv_ma['net_transfers'] + age_class_pv_fe['net_transfers']
    #     age_class_pv['total'] *= 1.0/2.0
    age_class_theory = xls.parse("Feuil1", index_col=0)

    age_class_pv["men_CBonnet"] = age_class_theory["men_Cbonnet"]
    age_class_pv["women_CBonnet"] = age_class_theory["women_Cbonnet"]
    age_class_pv.plot(style="--")
    plt.legend()
    plt.axhline(linewidth=2, color="black")
    plt.show()