def load_data(first_path, second_path): dbf1 = Dbf5(first_path) dbf2 = Dbf5(second_path) df_ndvi_lst = dbf1.to_dataframe() df_ndvi_ndvi = dbf2.to_dataframe() df_ndvi_ndvi = df_ndvi_ndvi[['grid_code', 'FID_pixelc']] return df_ndvi_lst, df_ndvi_ndvi
def __init__(self, bar, liq): self.eng_string = 'sqlite:///barcodes.db' self.liq = Dbf5(liq, codec='latin-1') self.bar = Dbf5(bar, codec='latin-1') self.liq.mem(chunksize=1000) self.bar.mem(chunksize=1000) self.eng = None self.conn = None #self.barcodes = self.db['BARCODES'] #self.liqcode = self.db['LIQCODE'] self.tables = {'BARCODES': self.bar, 'LIQCODE': self.liq}
def read_dbf(shp_dir_or_dbf_path): if os.path.isdir(shp_dir_or_dbf_path): dbf_path = find_shp_path(shp_dir_or_dbf_path) + ".dbf" else: dbf_path = shp_dir_or_dbf_path dbf = Dbf5(dbf_path) return dbf.to_dataframe()
def CalcRadiationSurfaces(Observers, DataFactorsCentroids, DataradiationLocation, locationtemp1, locationtemp2): # local variables CQSegments_centroid = locationtemp2 + '\\' + 'CQSegmentCentro' Outjoin = locationtemp2 + '\\' + 'Join' CQSegments = locationtemp2 + '\\' + 'CQSegment' OutTable = 'CentroidsIDobserver.dbf' # Create Join of features Observers and CQ_sementscentroids to # assign Names and IDS of observers (field TARGET_FID) to the centroids of the lines of the buildings, # then create a table to import as a Dataframe arcpy.SpatialJoin_analysis(CQSegments_centroid, Observers, Outjoin, "JOIN_ONE_TO_ONE", "KEEP_ALL", match_option="CLOSEST", search_radius="10 METERS") arcpy.JoinField_management(Outjoin, 'OBJECTID', CQSegments, 'OBJECTID') # add the lenghts of the Lines to the File arcpy.TableToTable_conversion(Outjoin, locationtemp1, OutTable) # ORIG_FID represents the points in the segments of the simplified shape of the building # ORIG_FID_1 is the observers ID Centroids_ID_observers0 = Dbf5(locationtemp1 + '\\' + OutTable).to_dataframe() Centroids_ID_observers = Centroids_ID_observers0[['Name', 'height_ag', 'ORIG_FID', 'ORIG_FID_1', 'Shape_Leng']] Centroids_ID_observers.rename(columns={'ORIG_FID_1': 'ID'}, inplace=True) # Create a Join of the Centroid_ID_observers and Datacentroids in the Second Chapter to get values of surfaces Shaded. Datacentroids = pd.read_csv(DataFactorsCentroids) DataCentroidsFull = pd.merge(Centroids_ID_observers, Datacentroids, left_on='ORIG_FID', right_on='ORIG_FID') # Read again the radiation table and merge values with the Centroid_ID_observers under the field ID in Radiationtable and 'ORIG_ID' in Centroids... Radiationtable = pd.read_csv(DataradiationLocation, index_col='Unnamed: 0') DataRadiation = pd.merge(left=DataCentroidsFull, right=Radiationtable, left_on='ID', right_on='ID') Data_radiation_path = locationtemp1 + '\\' + 'tempradaition.csv' DataRadiation.to_csv(Data_radiation_path, index=False) return Data_radiation_path
def get_data(): df_registos = pd.read_csv("data/data_old.csv", sep=";") df = geopandas.read_file("data/concelhos.shp") df_registos = df_registos[df_registos["ano"] == 2017] df_registos["incidencia"] = (df_registos["nr_crimes"] / df_registos["populacao_residente"]) * 100000 dbf = Dbf5("data/concelhos.dbf") df_meta = dbf.to_dataframe() df_meta["CCA_2"] = df_meta["CCA_2"].astype(np.float64) municipalities = len(df) features = [{ "type": "Feature", "geometry": (mapping(df.iloc[i]["geometry"].simplify(tolerance=0.01))), "id": df_registos[df_registos["codigo_municipio"] == df_meta.iloc[i] ["CCA_2"]]["municipio"] } for i in range(municipalities)] df_data = df_registos.drop(["ano"], axis=1) geojson = FeatureCollection(features) return df_data, geojson
def csv_of_blocks(d, in_file): print('creating csv of blocks') projected_file_path = get_proj_file_name(os.path.join(d.out_dir, 'census'), in_file) print(projected_file_path) blocks_dbf = projected_file_path.replace('.shp', '.dbf') # set proj print('reading in file') dbf = Dbf5(blocks_dbf) df = dbf.to_dataframe() # create dict to hold dtypes for columns convert_dict = {} # convert these to int64 columns_list = [ 'STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10' ] for c in columns_list: convert_dict[c] = 'int64' # conver these to float64 columns_list = ['INTPTLAT10', 'INTPTLON10'] for c in columns_list: convert_dict[c] = 'float64' # use conversion dict to set dtype of columns df = df.astype(convert_dict) # save df to csv out_csv = projected_file_path.replace('.shp', '.csv') df.to_csv(out_csv, index=False)
def parsear_dbf(path_dbf): '''Parseia o arquivo dbf e gera um DataFrame''' dbf = Dbf5(path_dbf, codec='utf-8') df = dbf.to_dataframe() return df
def read_dbf(path, encoding='utf-8'): """ read a .dbf file, temporary write it as a csv and return a pandas.core.frame.DataFrame object. :param path: The complete path to the .dbf to read :type path: str :param encoding: The codec to use when decoding text-based records :type encoding: str :return: a pandas.DataFrame corresponding to the .dbf file :rtype: pandas.core.frame.DataFrame .. warnings:: not all encodings are handled by this function """ dbf_instance = Dbf5(path, codec=encoding) csv_name = path[:-4] + '_from_dbf' + '.csv' # clear the folder from the temporary csv if necessary if os.path.isfile(csv_name): os.remove(csv_name) dbf_instance.to_csv(csv_name) df = pd.read_csv(csv_name, sep=',', encoding='Latin-1', na_values='None') os.remove(csv_name) return df
def read_class(tabular_data, gt_array_file): #tabular_data = 'the path of the DBF file', #gt_array_file = 'the path of the output .npy file' #.npy = Numpy array file """ give an $id column (I scall it FID) to all your DBF file so you can call it in order when matching the feature class with the respective Reflectance (in extract_values). I gave the $id column in QGIS so there is no code line for that here. """ dbf = Dbf5(tabular_data) # tabular_data = '/location/test.dbf' #convert DBF into Pandas dataframe df = dbf.to_dataframe() #get Feature ID (FID) and Class ID (Id) class_id = df[['FID', 'Id']] #[] == __getitem__ syntax #convert from Pandas dataframe into Numpy array a = class_id.values #sort based on FID (usually this is already sorted but better make it sure here) a = a[a[:,0].argsort(kind='mergesort')] #take out only the Class ID b = a[:,1] #convert into unsigned integer datatype b = b.astype(np.uint8) #make sure the Class ID array is flat gt_array = b.ravel() # save into .npy np.save(gt_array_file, gt_array) #gt_array save = '/location/array.npy' #output = gt_array return(gt_array)
def dohuc(huc): """Do what we need to do for this huc""" cursor = PGCONN.cursor() zdbf = Dbf5("zst%s.dbf" % (huc, ), codec='utf-8') zst = zdbf.to_dataframe() zst.columns = ['value', 'count', 'area', 'max', 'fpath', 'gridorder'] zst.sort_values(['fpath', 'gridorder'], inplace=True, ascending=True) # print(zst) df = read_postgis(""" SELECT fpath, fid, st_transform(geom, 4326) as geo, huc_12 from flowpaths where huc_12 = %s and scenario = 0 """, PGCONN, params=(huc, ), geom_col='geo', index_col='fpath') for col in ['F0_lon', 'F0_lat', 'F0_elev']: df[col] = None for gorder in range(1, 7): df['G%s_elev' % (gorder, )] = None df['G%s_len' % (gorder, )] = None df['G%s_contribarea' % (gorder, )] = None for ofe in range(1, 18): df['ofe%s_pos' % (ofe, )] = None for fpath, row in df.iterrows(): # 1) lat/lon of 'F0' first point (df.at[fpath, 'F0_lon'], df.at[fpath, 'F0_lat']) = np.asarray(row['geo'].xy)[:, 0] # 3) elevation of F0 and G1 through G6 nodes for gorder in range(1, 7): # Contributing area df.at[fpath, 'G%s_contribarea' % (gorder, )] = find_ca( zst, fpath, gorder) cursor.execute( """ select max(elevation), min(elevation), max(length) from flowpaths p JOIN flowpath_points t on (p.fid = t.flowpath) where p.scenario = %s and huc_12 = %s and fpath = %s """, (SCEN2CODE[gorder], huc, fpath)) row2 = cursor.fetchone() df.at[fpath, 'F0_elev'] = row2[0] # overwrite each time df.at[fpath, 'G%s_elev' % (gorder, )] = row2[1] # 4) horizontal distance from F0 to G1-G6 df.at[fpath, 'G%s_len' % (gorder, )] = row2[2] # 5) OFE positions along this path slpfn = "/i/%s/slp/%s/%s/%s_%s.slp" % (SCEN2CODE[6], huc[:8], huc[8:], huc, fpath) lines = open(slpfn).readlines() ofes = int(lines[5]) pos = 0 for ofe, ln in enumerate(range(7, 7 + ofes * 2, 2)): pos += float(lines[ln].split()[1]) df.at[fpath, "ofe%s_pos" % (ofe + 1, )] = pos del df['geo'] del df['fid'] # 6) Generic properties, perhaps can ignore? return df
def read_dbf(dbfile): """read dbase file""" dbfile = str(dbfile) from simpledbf import Dbf5 dbf = Dbf5(dbfile) pl = dbf.to_dataframe() pl.columns = [a.split('\x00')[0] for a in pl.columns] # remove strange characters in columns return pl
def convert_dbf_to_csv(file_name): """Convert DBF files to CSV files.""" csv_file = RAW_DIR / f'{file_name}.csv' dbf_file = RAW_DIR / f'{file_name}.DBF' if not os.path.exists(csv_file): log(f'Converting {dbf_file} files to {csv_file}') dbf = Dbf5(dbf_file) df = dbf.to_dataframe() df.to_csv(csv_file, index=False)
def get_cols_to_keep(file_path): dbf = Dbf5(file_path, codec='latin') df = dbf.to_dataframe() second = df.head().filter(regex="DIAGSEC").columns.tolist() to_keep = [ 'ANO_CMPT', 'MUNIC_RES', 'DT_INTER', 'DIAG_PRINC', 'DIAG_SECUN', 'IDADE', 'COD_IDADE', 'SEXO', 'MORTE', 'DT_INTER', 'DT_SAIDA' ] + second return to_keep
def read_class(tabular_data, gt_array, gt_array_file): dbf = Dbf5(tabular_data) # tabular_data = '/location/test.dbf' df = dbf.to_dataframe() class_id = d['Id'] #[] == __getitem__ syntax array = class_id.values gt_array = array.ravel() # save into .npy np.save(gt_array_file, gt_array) #gt_array save = '/location/array.npy'
def get_cols_to_keep(file_path): """ read in a single one of these tiny dbf files to get a list of all the column names we need to keep """ dbf = Dbf5(file_path, codec='latin') df = dbf.to_dataframe() second = df.head().filter(regex="DIAGSEC").columns.tolist() to_keep = ['ANO_CMPT', 'MUNIC_RES', 'DT_INTER', 'DIAG_PRINC', 'DIAG_SECUN', 'IDADE', 'COD_IDADE', 'SEXO', 'MORTE', 'DT_INTER', 'DT_SAIDA'] + second return to_keep
def calculate_sunny_hours_of_day(day, sunrise, temporary_folder): """ :param day: :type day: int :param sunrise: what is this? seems to be a list of sunrise times, but for the ecocampus case, I get a list of ints like 22 and 23... that can't be right, right? :type sunrise: list[int] :param temporary_folder: path to temporary folder with the radiations per day :return: """ radiation_sunnyhours = np.round(Dbf5(os.path.join(temporary_folder, 'Day_%(day)i.dbf' % locals())).to_dataframe(), 2) # Obtain the number of points modeled to do the iterations radiation_sunnyhours['ID'] = 0 radiation_sunnyhours['ID'] = range(1, radiation_sunnyhours.ID.count() + 1) # Table with empty values with the same range as the points. Table = pd.DataFrame.copy(radiation_sunnyhours) listtimes = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'T16', 'T17', 'T18', 'T19', 'T20', 'T21', 'T22', 'T23', 'T24'] for x in listtimes: Table[x] = 0 Table.drop('T0', axis=1, inplace=True) # Counter of Columns in the Initial Table Counter = radiation_sunnyhours.count(1)[0] values = Counter - 1 # Calculation of Sunrise time Sunrise_time = sunrise[day - 1] # Calculation of table for x in range(values): Hour = int(Sunrise_time) + int(x) Table['T' + str(Hour)] = radiation_sunnyhours['T' + str(x)] # rename the table for every T to get in 1 to 8760 hours. if day <= 1: name = 1 else: name = int(day - 1) * 24 + 1 Table.rename( columns={'T1': 'T' + str(name), 'T2': 'T' + str(name + 1), 'T3': 'T' + str(name + 2), 'T4': 'T' + str(name + 3), 'T5': 'T' + str(name + 4), 'T6': 'T' + str(name + 5), 'T7': 'T' + str(name + 6), 'T8': 'T' + str(name + 7), 'T9': 'T' + str(name + 8), 'T10': 'T' + str(name + 9), 'T11': 'T' + str(name + 10), 'T12': 'T' + str(name + 11), 'T13': 'T' + str(name + 12), 'T14': 'T' + str(name + 13), 'T15': 'T' + str(name + 14), 'T16': 'T' + str(name + 15), 'T17': 'T' + str(name + 16), 'T18': 'T' + str(name + 17), 'T19': 'T' + str(name + 18), 'T20': 'T' + str(name + 19), 'T21': 'T' + str(name + 20), 'T22': 'T' + str(name + 21), 'T23': 'T' + str(name + 22), 'T24': 'T' + str(name + 23), 'ID': 'ID'}, inplace=True) return Table
def dbf_to_csv(inputfile, outputfile='output.csv', replace=False): existe = os.path.exists(outputfile) if (existe and replace) or (not existe): Dbf5(inputfile, codec='latin-1').to_dataframe().to_csv(outputfile, sep=',', index=False) print('Archivo', inputfile, 'exitosamente convertido a', outputfile) else: print( f'El archivo {outputfile} ya existe. Para reemplazarlo use la opción replace=True' )
def upload(): uploaded_files = request.files.getlist("file[]") filenames = [] elShp = "" ## falta un chek de que viene el shp, shx, prj y dbf for f in uploaded_files: if f and allowed_file(f.filename): filename = secure_filename(f.filename) f.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) if f.filename.endswith(".shp"): elShp = f.filename filenames.append(filename) # print os.path.join(app.config['UPLOAD_FOLDER'], elShp) reader = shapefile.Reader(os.path.join(app.config['UPLOAD_FOLDER'], elShp)) # print reader.shapeType fields = reader.fields[1:] field_names = [field[0] for field in fields] buff = [] for sr in reader.shapeRecords(): atr = dict(zip(field_names, sr.record)) geom = sr.shape.__geo_interface__ buff.append(dict(type="Feature", \ geometry=geom, properties=atr)) el_hash = hash_from_shp(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "dbf")) if os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], el_hash)): os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "dbf")) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "prj")) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "shp")) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "shx")) if url is not None: return redirect(url + "/pc_glyph/%s" % el_hash) else: return redirect("/pc_glyph/%s" % el_hash) else: os.makedirs(os.path.join(app.config['UPLOAD_FOLDER'], el_hash)) # write the GeoJSON file with open(os.path.join(os.path.join(app.config['UPLOAD_FOLDER'], el_hash), "layer.json"), "w") as geojson: geojson.write(dumps({"type": "FeatureCollection", "features": buff}, indent=0)) # write the csv file dbf = Dbf5(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "dbf")) df = dbf.to_dataframe() df.to_csv(os.path.join(os.path.join(app.config['UPLOAD_FOLDER'], el_hash), "data.csv"), encoding="utf8", index=False) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "dbf")) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "prj")) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "shp")) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], elShp[:-3] + "shx")) if url is not None: return redirect(url + "/pc_glyph/%s" % el_hash) else: return redirect("/pc_glyph/%s" % el_hash)
def dbf2df(dbf_path, index=None, cols=False, incl_index=False): dbf = Dbf5(dbf_path) df = dbf.to_dataframe() if cols: if incl_index: cols.append(index) df = df[cols].copy() if index: df.set_index(index, inplace=True) return df
def get_data(directory, year, raw_data_path): base_url = 'https://www2.census.gov/geo/tiger/TIGER' + year #Create Working Directory if it doesn't exist. if not os.path.isdir(os.path.join(raw_data_path, year)): os.mkdir(os.path.join(raw_data_path, year)) print('Created directory ' + os.path.join(raw_data_path, year)) if not os.path.isdir(os.path.join(raw_data_path, year, directory)): os.mkdir(os.path.join(raw_data_path, year, directory)) print('Created directory ' + os.path.join(raw_data_path, year, directory)) working_directory = os.path.join(raw_data_path, year, directory) file_name = 'tl_' + year + '_us_' + directory.lower() + '.csv' if os.path.exists(os.path.join(working_directory, file_name)): print('Loading data from ' + os.path.join(working_directory, file_name)) return pd.read_csv(os.path.join(working_directory, file_name)) file_name = file_name[:-4] + '.zip' download_url = os.path.join(base_url, directory, file_name) zip_file_path = os.path.join(working_directory, file_name) #Download print('Downloading data from ' + download_url) response = requests.get(download_url) with open(zip_file_path, 'wb') as f: f.write(response.content) #Extract Zip File zip_ref = zipfile.ZipFile(zip_file_path, 'r') zip_ref.extractall(working_directory) zip_ref.close() #Clean up files_in_working_directory = os.listdir(working_directory) for file in files_in_working_directory: file_path = os.path.join(working_directory, file) if file_path.endswith('.dbf'): dbf = Dbf5(file_path) df = dbf.to_dataframe() df.to_csv(file_path[:-4] + '.csv', index=False) os.remove(file_path) return df
def __convert_dbc(self, db): if db.endswith('.csv'): pass elif db.endswith('.dbf'): pass else: after_ = db[:-3] + 'dbf' system(f'{self.__blast} {db} {after_}') remove(path.expanduser(db)) # ReadDbf({after_}, convert='convert', tmp=None) try: dbf = Dbf5(after_, codec="iso-8859-1") dbf.to_csv(after_.replace(".dbf", ".csv")) except UnicodeDecodeError: dbf = Dbf5(after_, codec="utf-8") dbf.to_csv(after_.replace(".dbf", ".csv")) remove(after_) after_ = None db = None
def dbf_to_csv( dbf_table_pth ): # Entrer un fnl, produire un .csv, du même nom et même chemin d’accès, sauf extension csv_fn = dbf_table_pth[:-4] + ".csv" exists = os.path.isfile(dbf_table_pth) if exists: table = Dbf5(dbf_table_pth, codec='ISO-8859-1') table.to_csv(csv_fn) return csv_fn # renvoyer le nom du .csv else: print("Le fichier n'existe pas") return False
def calculate_radiation_for_surfaces(observers_path, data_factor_centroids_csv, sunny_hours_of_year, temporary_folder, path_arcgis_db): arcpy.env.workspace = path_arcgis_db arcpy.env.overwriteOutput = True arcpy.CheckOutExtension("spatial") # local variables CQSegments_centroid = os.path.join(path_arcgis_db, 'CQSegmentCentro') Outjoin = os.path.join(path_arcgis_db, 'Outjoin') CQSegments = os.path.join(path_arcgis_db, 'CQSegment') OutTable = 'CentroidsIDobserver.dbf' # Create Join of features Observers and CQ_sementscentroids to # assign Names and IDS of observers (field TARGET_FID) to the centroids of the lines of the buildings, # then create a table to import as a Dataframe arcpy.SpatialJoin_analysis(CQSegments_centroid, observers_path, Outjoin, "JOIN_ONE_TO_ONE", "KEEP_ALL", match_option="CLOSEST", search_radius="10 METERS") arcpy.JoinField_management( Outjoin, 'OBJECTID', CQSegments, 'OBJECTID') # add the lenghts of the Lines to the File arcpy.TableToTable_conversion(Outjoin, temporary_folder, OutTable) # ORIG_FID represents the points in the segments of the simplified shape of the building # ORIG_FID_1 is the observers ID Centroids_ID_observers0_dbf5 = Dbf5( os.path.join(temporary_folder, OutTable)).to_dataframe() Centroids_ID_observers_dbf5 = Centroids_ID_observers0_dbf5[[ 'Name', 'height_ag', 'ORIG_FID', 'ORIG_FID_1', 'Shape_Leng' ]].copy() Centroids_ID_observers_dbf5.rename(columns={'ORIG_FID_1': 'ID'}, inplace=True) # Create a Join of the Centroid_ID_observers and Datacentroids in the Second Chapter to get values of surfaces Shaded. Datacentroids = pd.read_csv(data_factor_centroids_csv) DataCentroidsFull = pd.merge(Centroids_ID_observers_dbf5, Datacentroids, left_on='ORIG_FID', right_on='ORIG_FID') # Read again the radiation table and merge values with the Centroid_ID_observers under the field ID in Radiationtable and 'ORIG_ID' in Centroids... DataRadiation = pd.merge(left=DataCentroidsFull, right=sunny_hours_of_year, left_on='ID', right_on='ID') return DataRadiation
def xls2csv(self): # excel文件转csv name0 = [ r"{}/行业数据{}.xls".format(PATH1, TODAY), r"{}/HYSJ{}.csv".format(PATH3, TODAY) ] data = pd.read_excel(name0[0], converters={u'证券代码': str, u"CODE": str}) data.to_csv(name0[1], encoding='gbk') name1 = [ r"{}/StockIndustry{}.xls".format(PATH2, TODAY), r"{}/STOCKINDUSTRY{}.csv".format(PATH3, TODAY) ] name2 = [ r"{}/消费服务备选库.xls".format(PATH2), r"{}/XFFWBXK{}.csv".format(PATH3, TODAY) ] name3 = [ r"{}/信用风险债券池.xls".format(PATH2), r"{}/XYFXZQC{}.csv".format(PATH3, TODAY) ] name4 = [ r"{}/信用债二级库备选库.xls".format(PATH2), r"{}/XYZEJKBXK{}.csv".format(PATH3, TODAY) ] name6 = [ r"{}/债券禁选池原因{}.xls".format(PATH1, TODAY, TODAY), r"{}/ZQJXCYY{}.csv".format(PATH3, TODAY) ] if os.path.exists(name6[1]): # 判断文件是否存在 os.remove(name6[1]) # 文件每次都要删除,不然会一直变大 data6 = Dbf5(name6[0], codec='gbk') data6.to_csv(name6[1]) name5 = [ r"{}/BB-库.xls".format(PATH2), r"{}/BBK{}.csv".format(PATH3, TODAY) ] self.name5 = name5 data5 = pd.read_excel(name5[0], converters={ u'证券代码': str, u"CODE": str }) data5.to_csv(name5[1], encoding='gbk') names = [name1, name2, name3, name4] for name in names: self.csv_name_xls.append(name[1]) data = pd.read_excel(name[0], converters={ u'证券代码': str, u"CODE": str }) data.to_csv(name[1], encoding='gbk')
def calc_radiation_day(day, sunrise, route): radiation_sunnyhours = np.round(Dbf5(route + '\\' + 'Day_' + str(day) + '.dbf').to_dataframe(), 2) # Obtain the number of points modeled to do the iterations radiation_sunnyhours['ID'] = 0 counter = radiation_sunnyhours.ID.count() value = counter + 1 radiation_sunnyhours['ID'] = range(1, value) # Table with empty values with the same range as the points. Table = pd.DataFrame.copy(radiation_sunnyhours) listtimes = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'T16', 'T17', 'T18', 'T19', 'T20', 'T21', 'T22', 'T23', 'T24'] for x in listtimes: Table[x] = 0 Table.drop('T0', axis=1, inplace=True) # Counter of Columns in the Initial Table Counter = radiation_sunnyhours.count(1)[0] values = Counter - 1 # Condition to take into account daysavingtime in Switzerland as the radiation data in ArcGIS is calculated for 2013. if 90 <= day < 300: D = 1 else: D = 0 # Calculation of Sunrise time Sunrise_time = sunrise[day - 1] # Calculation of table for x in range(values): Hour = int(Sunrise_time) + int(D) + int(x) Table['T' + str(Hour)] = radiation_sunnyhours['T' + str(x)] # rename the table for every T to get in 1 to 8760 hours. if day <= 1: name = 1 else: name = int(day - 1) * 24 + 1 Table.rename( columns={'T1': 'T' + str(name), 'T2': 'T' + str(name + 1), 'T3': 'T' + str(name + 2), 'T4': 'T' + str(name + 3), 'T5': 'T' + str(name + 4), 'T6': 'T' + str(name + 5), 'T7': 'T' + str(name + 6), 'T8': 'T' + str(name + 7), 'T9': 'T' + str(name + 8), 'T10': 'T' + str(name + 9), 'T11': 'T' + str(name + 10), 'T12': 'T' + str(name + 11), 'T13': 'T' + str(name + 12), 'T14': 'T' + str(name + 13), 'T15': 'T' + str(name + 14), 'T16': 'T' + str(name + 15), 'T17': 'T' + str(name + 16), 'T18': 'T' + str(name + 17), 'T19': 'T' + str(name + 18), 'T20': 'T' + str(name + 19), 'T21': 'T' + str(name + 20), 'T22': 'T' + str(name + 21), 'T23': 'T' + str(name + 22), 'T24': 'T' + str(name + 23), 'ID': 'ID'}, inplace=True) return Table
def read_dbf(dbf_file, exclude_columns=None): # type: (str, list) -> pd.DataFrame """ Read dbf into a pandas data frame :param dbf_file: Path to dbf :param exclude_columns: List of strings containing column names that shall be excluded. Case sensitive! :return: Pandas data frame containing the dbf data """ dbf = Dbf5(dbf_file) df = dbf.to_dataframe() if exclude_columns: df = df.loc[:, df.columns.difference(exclude_columns)] return df
def add_dbf_indicator_by_id(area_level, context_shp_name, context_id_field, context_indic_field): indicators = get_or_create_indicators_df(area_level) indicators.drop(context_indic_field, 1, inplace=True) dbf = Dbf5(get_context_shp_path(context_shp_name) + ".dbf") context_df = dbf.to_dataframe() context_df.drop_duplicates(context_id_field, inplace=True) context_df.set_index(context_id_field, inplace=True) indicators = indicators.join(context_df[context_indic_field]) indicators.to_csv(pf.get_indic(area_level), encoding="utf-8") return indicators
def load_current_schedule(env): sched_dbf_path = os.path.split(env.settings['PRODUCT_DATA_PATH'])[0] sched_dbf_path = os.path.join(sched_dbf_path, "EXPORT.DBF") print("Loading Current Schedule from {0}".format(sched_dbf_path)) dbf = Dbf5(sched_dbf_path) df = dbf.to_dataframe() # Build Schedule sched = [] b_id = 0 booked_inv = 0.0 off_grade = 0.0 actual_prod = 0.0 sched_start_row = df.iloc[0,:] start_split = sched_start_row["START_DATE"].split("-") if len(sched_start_row["START_TIME"]) > 3: start_hour = int(sched_start_row["START_TIME"][:2]) else: start_hour = int(sched_start_row["START_TIME"][0]) start_min = int(sched_start_row["START_TIME"][-2:]) sched_start = datetime(int(start_split[0]),int(start_split[1]),int(start_split[2]),start_hour, start_min) sched_end_dt = sched_start idx = 0 ## Cut current schedule to only include fixed planning horizon elements while sched_end_dt < sched_start + timedelta(hours = 24.0*env.fixed_planning_horizon): row = df.iloc[idx,:] gmid = int(row["GMID"]) prod_rate = row["PROD_RATE"] prod_qty = row["QUANTITY"] prod_time = prod_qty / prod_rate start_split = row["START_DATE"].split("-") if len(row["START_TIME"]) > 3: start_hour = int(row["START_TIME"][:2]) else: start_hour = int(row["START_TIME"][0]) start_min = int(row["START_TIME"][-2:]) datetime_start = datetime(int(start_split[0]),int(start_split[1]),int(start_split[2]),start_hour, start_min) prod_start = datetime_start - sched_start prod_start = prod_start.total_seconds() / (60*60) prod_end = int(prod_start + prod_time) cure_time = 24 cure_end = prod_end + cure_time inv_index = env.gmids.index(gmid) + 1 sched_row = [b_id,gmid,prod_rate,prod_qty, prod_time, prod_start, prod_end, cure_time, cure_end, booked_inv, inv_index, off_grade, actual_prod] b_id += 1 sched.append(sched_row) idx += 1 sched_end_dt = datetime_start schedule = np.stack(sched) return schedule
def collect_parameters(cls, file): parameters = [] parameters.append(ParserXML.get_xpath_node(file, '@path')) parameters.append(ParserXML.get_xpath_node(file, '@path_form')) parameters.append(ParserXML.get_xpath_node(file, '@suffix')) parameters.append(ParserXML.get_xpath_collection(file, 'columns/column/@col')) parameters.append(ParserXML.get_xpath_collection(file, 'columns/column/@df_col')) parameters.append(ParserXML.get_xpath_node(file, 'merge_parameters/@parameters')) parameters.append(ParserBase.collect_filters(file)) parameters.append(ParserBase.get_with_default(ParserXML.get_xpath_node(file, '@sep'), ',')) parameters.append(ParserBase.get_with_default(ParserXML.get_xpath_node(file, '@enconding'), 'utf-8')) parameters.append(ParserBase.get_with_default(ParserXML.get_xpath_node(file, '@thousands'), ',')) parameters.append(ParserBase.get_with_default(ParserXML.get_xpath_node(file, '@decimal'), '.')) Dbf5(parameters[0], parameters[8]).to_csv(parameters[1]) return parameters
def analyze_dbf(path): print('\n===== .dbf analysis =====') dbf = Dbf5(path, codec='UTF-8') dbf_records = dbf.numrec print('The number of records in the .dbf:', dbf_records) # Ideally, the dbf data should be converted to a dataframe and it should be # printed. Sometimes not possible due to Unicode/Decode errors. Check. df = dbf.to_dataframe() print('Data of the .dbf file:\n', df.head()) print(df.columns) print('Unique country names:') print(df.CNTRY_NAME.unique())