def descargar(): #descargar xml a csv try: archivoDescargar = "http://3dsdb.com/xml.php" inputs = urllib2.urlopen(archivoDescargar) output = "3dsdb.csv" converter = xml2csv(inputs, output, encoding="utf-8") converter.convert(tag="release", delimiter=";") #eliminar caracteres especiales y limpiar nombres original_string = open('3dsdb.csv').read() nuevo_string = re.sub('333;', 'o', original_string) nuevo_string = re.sub('&', 'and', nuevo_string) nuevo_string = re.sub(':', ' -', nuevo_string) nuevo_string = re.sub('"', '', nuevo_string) nuevo_string = re.sub('Rev[0-9][0-9]', '', nuevo_string) nuevo_string = re.sub('Rev[0-9]', '', nuevo_string) nuevo_string = re.sub(r'[\*|:<>?/#().]', '', nuevo_string) nuevo_string = re.sub(' ', ' ', nuevo_string) open('3dsdb.csv', 'w').write(nuevo_string) tkMessageBox.showinfo('Info', "The database has been successfully downloaded", icon='info') except: mostrarerror("Failed to download database") compdatabase()
def run_xml2csv(): print """xml2csv by Kailash Nadh (http://nadh.in) --help for help """ # parse arguments parser = argparse.ArgumentParser(description='Convert an xml file to csv format.') parser.add_argument('--input', dest='input_file', required=True, help='input xml filename') parser.add_argument('--output', dest='output_file', required=True, help='output csv filename') parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item') parser.add_argument('--delimiter', dest='delimiter', default=', ', help='delimiter character. (default=, comma-space)') parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore') parser.add_argument('--header', dest='header', action='store_false', default=True, help='print csv header (default=True)') parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)') parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process') parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000', help='number of records to keep in buffer before writing to disk (default=1000)') args = parser.parse_args() converter = xml2csv(args.input_file, args.output_file, args.encoding) num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore, header=args.header, limit=args.limit, buffer_size=args.buffer_size) print "\n\nWrote", num, "records to", args.output_file
def run_xml2csv(): print("""xml2csv --help for help """) # parse arguments parser = argparse.ArgumentParser(description='Convert an xml file to csv format.') parser.add_argument('--input', dest='input_file', required=True, help='input xml filename') parser.add_argument('--output', dest='output_file', required=True, help='output csv filename') parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item') parser.add_argument('--delimiter', dest='delimiter', default=',', help='delimiter character. (default=,)') parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore') parser.add_argument('--noheader', dest='noheader', action='store_true', help='exclude csv header (default=False)') parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)') parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process') parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000', help='number of records to keep in buffer before writing to disk (default=1000)') parser.add_argument('--noquotes', dest='noquotes', action='store_true', help='no quotes around values') args = parser.parse_args() converter = xml2csv(args.input_file, args.output_file, args.encoding) num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore, noheader=args.noheader, limit=args.limit, buffer_size=args.buffer_size, quotes=not args.noquotes) print("\n\nWrote", num, "records to", args.output_file)
def run_on_all_xml(path_name, destination_path=None): from xmlutils.xml2csv import xml2csv path_name = os.path.abspath(path_name) #------open files: if os.path.exists(path_name) == None: print "ilegal path!" return # get all the xml from the folder tasks_list = [] # a folder was sepcified if os.path.isdir(path_name): for filename in os.listdir(path_name): # create all the files to convert tasks_list.append(os.path.join(path_name, filename)) #Perform the specified command on all specified tasks for task in tasks_list: if task[-4:] == '.xml': task_full_path = os.path.join(path_name, task) task_to_create = task_full_path[:-4] + '.csv' #if needs a name change: if task[-6:-4] == 'gz': task_to_create = change_xml_name(path_name, task) #print task_to_create # if a destination was mentioned if destination_path != None: task_csv = task[-4:] + '.csv' task_to_create = os.path.join(destination_path, task_csv) #actual converting from xml to csv: converter = xml2csv(task_full_path, task_to_create, encoding="utf-8") task_to_create = task_to_create.split('-') #if it's shupersal then the chiled is called 'Item', else it is called Product: if task_to_create[1] == '7290027600007': converter.convert(tag="Item") else: converter.convert(tag="Product") #remove xml file: os.remove(task_full_path) elif filename[-5:] == '.json': '''from xmlutils.xml2json import xml2json
def parse_xml_to_csv(self, fName, tag="Row"): """ This will convert an XML file to a CSV file. It is currently soft linked to work with the FK offers through the tag specification. However, this could likely be adapted if there are other XML formated that are specified. Parameters ---------- fName: XML filename tag: XML tag information Returns: -------- """ output_name = fName.replace('.XML', '.csv') converter = xml2csv(fName, output_name, encoding="utf-8") converter.convert(tag=tag) return self
def run_xml2csv(): print """xml2csv by Kailash Nadh (http://nadh.in) --help for help """ # parse arguments parser = argparse.ArgumentParser(description="Convert an xml file to csv format.") parser.add_argument("--input", dest="input_file", required=True, help="input xml filename") parser.add_argument("--output", dest="output_file", required=True, help="output csv filename") parser.add_argument("--tag", dest="tag", required=True, help="the record tag. eg: item") parser.add_argument("--delimiter", dest="delimiter", default=",", help="delimiter character. (default=,)") parser.add_argument("--ignore", dest="ignore", default="", nargs="+", help="list of tags to ignore") parser.add_argument("--noheader", dest="noheader", action="store_true", help="exclude csv header (default=False)") parser.add_argument("--encoding", dest="encoding", default="utf-8", help="character encoding (default=utf-8)") parser.add_argument("--limit", type=int, dest="limit", default=-1, help="maximum number of records to process") parser.add_argument( "--buffer_size", type=int, dest="buffer_size", default="1000", help="number of records to keep in buffer before writing to disk (default=1000)", ) args = parser.parse_args() converter = xml2csv(args.input_file, args.output_file, args.encoding) num = converter.convert( tag=args.tag, delimiter=args.delimiter, ignore=args.ignore, noheader=args.noheader, limit=args.limit, buffer_size=args.buffer_size, ) print "\n\nWrote", num, "records to", args.output_file
def descargar(): #descargar xml a csv try: archivoDescargar = "http://3dsdb.com/xml.php" inputs = urllib2.urlopen(archivoDescargar) output = "3dsdb.csv" converter = xml2csv(inputs, output, encoding="utf-8") converter.convert(tag="release",delimiter=";") #eliminar caracteres especiales y limpiar nombres original_string = open('3dsdb.csv').read() nuevo_string = re.sub('333;', 'o', original_string) nuevo_string = re.sub('&', 'and', nuevo_string) nuevo_string = re.sub(':', ' -', nuevo_string) nuevo_string = re.sub('"', '', nuevo_string) nuevo_string = re.sub('Rev[0-9][0-9]', '', nuevo_string) nuevo_string = re.sub('Rev[0-9]', '', nuevo_string) nuevo_string = re.sub(r'[\*|:<>?/#().]', '', nuevo_string) nuevo_string = re.sub(' ', ' ', nuevo_string) open('3dsdb.csv', 'w').write(nuevo_string) tkMessageBox.showinfo('Info', "The database has been successfully downloaded", icon='info') except: mostrarerror("Failed to download database") compdatabase()
for input_file in os.listdir('.'): if input_file.endswith(".xml"): numberOfFilesProcessed += 1 NoHeader = False if numberOfFilesProcessed == 0: NoHeader = True else: NoHeader = False print("Converting " + input_file + " " + str(numberOfFilesProcessed) + " files out of " + str(numberOfFiles) + " completed") tree = et.parse(input_file) root = tree.getroot() if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}datasets'): converter = xml2csv(input_file,"./converted_versions/dataset/" + input_file.split('.xml')[0] + ".csv") converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}dataset",noheader=NoHeader) if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}grants'): converter = xml2csv(input_file,"./converted_versions/grant/" + input_file.split('.xml')[0] + ".csv") converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}grant",noheader=NoHeader) if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}publications'): converter = xml2csv(input_file,"./converted_versions/publication/" + input_file.split('.xml')[0] + ".csv") converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}publication",noheader=NoHeader,recordType="publication") if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}researchers'): converter = xml2csv(input_file,"./converted_versions/researcher/" + input_file.split('.xml')[0] + ".csv") converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}researcher",noheader=NoHeader,recordType="researcher") if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}relations'):
# usr/bin/env python import os from xmlutils.xml2csv import xml2csv import csv for file in os.listdir("input"): if file.endswith(".xml"): input_url = os.path.join("input", file) output_url = os.path.join("output", file).replace(".xml", ".csv") output_temp_url = "output/temp.csv" #print(os.path.join("input", file)) #print output_url converter = xml2csv(input_url, output_temp_url, encoding="utf-8") converter.convert(tag="Skill") with open(output_temp_url, 'r') as csvinput: with open(output_url, 'w') as csvoutput: writer = csv.writer(csvoutput) for row in csv.reader(csvinput): writer.writerow(row + [file.split(".")[0]]) os.system("rm -rf output/temp.csv") final_out = open("output/out.csv", "a") f = open(output_url) f.next() for line in f: final_out.write(line) f.close() final_out.close()
def run_xml2csv(): print """xml2csv by Kailash Nadh (http://nadh.in) --help for help """ # parse arguments parser = argparse.ArgumentParser( description='Convert an xml file to csv format.') parser.add_argument('--input', dest='input_file', required=True, help='input xml filename') parser.add_argument('--output', dest='output_file', required=True, help='output csv filename') parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item') parser.add_argument('--delimiter', dest='delimiter', default=',', help='delimiter character. (default=,)') parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore') parser.add_argument('--noheader', dest='noheader', action='store_true', help='exclude csv header (default=False)') parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)') parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process') parser.add_argument( '--buffer_size', type=int, dest='buffer_size', default='1000', help= 'number of records to keep in buffer before writing to disk (default=1000)' ) args = parser.parse_args() converter = xml2csv(args.input_file, args.output_file, args.encoding) num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore, noheader=args.noheader, limit=args.limit, buffer_size=args.buffer_size) print "\n\nWrote", num, "records to", args.output_file
def convertCSV(): array = ["tempadult.xml", "tempkids.xml", "tempteen.xml", "tempstory.xml", "tempbook.xml", "tempfriend.xml", fileIn] array2 = ["tempadults.csv", "tempkids.csv", "tempteen.csv", "tempstory.csv", "tempbook.csv", "tempfriend.csv", "First_Pull.csv"] for array, array2 in zip(array, array2): converter = xml2csv(array, array2, encoding="utf-8") converter.convert(tag="event")
from xmlutils.xml2csv import xml2csv converter = xml2csv("Users.xml", "output.csv", encoding="utf-8") converter.convert(tag="tag")
def desinventar_clean_transform(input, output): """ desinventar_clean_transform Simple script that maps desinventar databases (.xml) into IBF-system format Parameters ---------- input : str name of input file (.xml) output : str name of output file (.csv) """ # read DesInventar data and filter with open(input, 'r', encoding="utf8") as file: data = file.read() events = re.search('(?:<fichas>)[\s,\S]+(?:<\/fichas>)', data).group(0) with open('raw_data/xml_temp.xml', 'w', encoding='utf8') as file: file.write(events) # fix encoding and save as csv converter = xml2csv("raw_data/xml_temp.xml", "raw_data/{}.csv".format( input.split('/')[-1].split('.')[0]), encoding="utf8") converter.convert(tag="TR") os.remove("raw_data/xml_temp.xml") # read DesInventar data as csv df = pd.read_csv("raw_data/{}.csv".format( input.split('/')[-1].split('.')[0])) # change some column names dict_columns = { 'serial': 'x', 'level0': 'adm1_pcode', 'level1': 'adm2_pcode', 'level2': 'adm3_pcode', 'name0': 'adm1_name', 'name1': 'adm2_name', 'name2': 'adm3_name', 'evento': 'disaster_type', 'lugar': 'location', 'fechano': 'year', 'fechames': 'month', 'fechadia': 'day', 'muertos': 'people_dead', 'heridos': 'people_injured', 'desaparece': 'missing', 'afectados': 'people_affected', 'vivdest': 'house_destroyed', 'vivafec': 'house_damaged', 'fuentes': 'data_source_other', 'valorloc': 'x', 'valorus': 'x', 'fechapor': 'x', 'fechafec': 'date_recorded', 'hay_muertos': 'x', 'hay_heridos': 'x', 'hay_deasparece': 'x', 'hay_afectados': 'x', 'hay_vivdest': 'x', 'hay_vivafec': 'x', 'hay_otros': 'x', 'otros': 'x', 'socorro': 'x', 'salud': 'hospital_health_center', 'educacion': 'school', 'agropecuario': 'agriculture', 'industrias': 'industry', 'acueducto': 'aqueduct', 'alcantarillado': 'sewerage_latrine', 'energia': 'energy', 'comunicaciones': 'communication', 'causa': 'x', 'descausa': 'x', 'transporte': 'road', 'magnitud2': 'x', 'nhospitales': 'x', 'nescuelas': 'x', 'nhectareas': 'lost_crops_ha', 'cabezas': 'livestock_lost', 'kmvias': 'x', 'duracion': 'x', 'damnificados': 'x', 'evacuados': 'evacuated', 'hay_damnificados': 'x', 'hay_evacuados': 'x', 'hay_reubicados': 'x', 'reubicados': 'people_displaced', 'clave': 'x', 'glide': 'disaster_id', 'defaultab': 'x', 'approved': 'x', 'latitude': 'x', 'longitude': 'x', 'uu_id': 'x', 'di_comments': 'comments' } df = df.rename(columns=dict_columns) df['disaster_type'] = df['disaster_type'].str.lower() # convert some variables to to int var_to_int = [ 'adm1_pcode', 'adm2_pcode', 'adm3_pcode', 'evacuated', 'people_affected', 'people_dead', 'missing' ] df[var_to_int] = df[var_to_int].astype(int, errors='ignore') # merge some variables df['people_affected'] = df.apply( lambda x: sum_cols(x, 'people_affected', 'evacuated'), axis=1) df['people_dead'] = df.apply( lambda x: sum_cols(x, 'people_dead', 'missing'), axis=1) df = df.drop(columns=['x', 'evacuated', 'missing']) df['data_source'] = 'DesInventar' df['data_source_url'] = 'https://www.desinventar.net' df['date_event'] = pd.to_datetime(df[['year', 'month', 'day']], errors='coerce') df = df.drop(columns=['year', 'month', 'day']) df.to_csv(output) return df
def xmltocsv(filename): log('Start : xmltocsv') log('filename :' + filename) converter = xml2csv(filename + ".xml", filename + ".csv", encoding="utf-8") converter.convert(tag="G_1")
async def check(file: UploadFile = File(...)): if file.filename.endswith('.csv'): df = pd.read_csv(file.file) df_head = df.head(5) df_tail = df.tail(5) # cli.set("userId",str(df)) head = df_head.to_json(orient='records') tail = df_tail.to_json(orient='records') head = eval(head) tail = eval(tail) return { "head":head, "tail":tail } # if file == txt elif file.filename.endswith('.txt'): read_file = pd.read_csv(file.file) read_file.to_csv('txt_to_csv.csv',index=None) df = pd.read_csv('txt_to_csv.csv') df_head = df.head(5) df_tail = df.tail(5) # cli.set("userId",str(df)) head = df_head.to_json(orient='records') tail = df_tail.to_json(orient='records') head = eval(head) tail = eval(tail) return { "head":head, "tail":tail } # if file == pdf elif file.filename.endswith('.pdf'): c = pdftables_api.Client('upf6leimlx9u') c.csv(file.file, 'pdf_to_csv.csv') df = pd.read_csv('pdf_to_csv.csv') df_head = df.head(5) df_tail = df.tail(5) # cli.set("userId",str(df)) head = df_head.to_json(orient='records') tail = df_tail.to_json(orient='records') head = eval(head) tail = eval(tail) return { "head":head, "tail":tail } # if file == xls elif file.filename.endswith('.xls'): data_xls = pd.read_excel(file.file, 'Sheet1', index_col=None) data_xls.to_csv('xls_to_csv.csv', encoding='utf-8') df = pd.read_csv('xls_to_csv.csv') df_head = df.head(5) df_tail = df.tail(5) # cli.set("userId",str(df)) head = df_head.to_json(orient='records') tail = df_tail.to_json(orient='records') head = eval(head) tail = eval(tail) return { "head":head, "tail":tail } # if file == tsv elif file.filename.endswith('.tsv'): csv_file = pd.read_table(file.file,sep='\t') csv_file.to_csv('tsv_to_csv.csv',index=False) df = pd.read_csv('tsv_to_csv.csv') df_head = df.head(5) df_tail = df.tail(5) # cli.set("userId",str(df)) head = df_head.to_json(orient='records') tail = df_tail.to_json(orient='records') head = eval(head) tail = eval(tail) return { "head":head, "tail":tail } # if file == xml elif file.filename.endswith('.xml'): converter = xml2csv(file.file, "xml_to_csv.csv", encoding="utf-8") converter.convert(tag="tag_value_defined_by_user") df = pd.read_csv('xml_to_csv.csv') df_head = df.head(5) df_tail = df.tail(5) # cli.set("userId",str(df)) head = df_head.to_json(orient='records') tail = df_tail.to_json(orient='records') head = eval(head) tail = eval(tail) return { "head":head, "tail":tail } # if file == tf_record elif file.filename.endswith('.tf'): pass # content = txt_to_csv(file.read()) # cli.set("uid",content) # return {"status":"done"} else: return {"error": "Enter a vaild file format"}
import xml.etree.ElementTree as et import sys import re from xmlutils.xml2csv import xml2csv input = str(sys.argv[1]) output_for_publication = 'publication.csv' output_for_researcher = 'researcher.csv' output_for_relationship = 'relationship.csv' #output_file = str(sys.argv[2]) converter = xml2csv(input, "/publication/publication.csv") converter.convert( tag="{http://researchgraph.org/schema/v2.0/xml/nodes}publication") # tree = et.parse(input_file) # root = tree.getroot() # cols = [] # row = [] # dict={} # for col in root[0][0]: # cols.append(re.sub('\{.*?\}','',col.tag)) # for r in root[0][0]: # print(r.text)