Python xml2csv示例，xmlutils.xml2csv.xml2csv Python示例

示例#1

0

显示文件

文件： 3DS-Rename.py 项目： Medly13/3DS-Rename

def descargar():
    #descargar xml a csv
    try:
        archivoDescargar = "http://3dsdb.com/xml.php"
        inputs = urllib2.urlopen(archivoDescargar)
        output = "3dsdb.csv"
        converter = xml2csv(inputs, output, encoding="utf-8")
        converter.convert(tag="release", delimiter=";")

        #eliminar caracteres especiales y limpiar nombres
        original_string = open('3dsdb.csv').read()
        nuevo_string = re.sub('333;', 'o', original_string)
        nuevo_string = re.sub('&', 'and', nuevo_string)
        nuevo_string = re.sub(':', ' -', nuevo_string)
        nuevo_string = re.sub('"', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9][0-9]', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9]', '', nuevo_string)
        nuevo_string = re.sub(r'[\*|:<>?/#().]', '', nuevo_string)
        nuevo_string = re.sub('  ', ' ', nuevo_string)
        open('3dsdb.csv', 'w').write(nuevo_string)
        tkMessageBox.showinfo('Info',
                              "The database has been successfully downloaded",
                              icon='info')
    except:
        mostrarerror("Failed to download database")
        compdatabase()

示例#2

0

显示文件

文件： console.py 项目： iamedwardshen/xmlutils.py

def run_xml2csv():
	print """xml2csv by Kailash Nadh (http://nadh.in)
	--help for help

	"""

	# parse arguments
	parser = argparse.ArgumentParser(description='Convert an xml file to csv format.')
	parser.add_argument('--input', dest='input_file', required=True, help='input xml filename')
	parser.add_argument('--output', dest='output_file', required=True, help='output csv filename')
	parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item')
	parser.add_argument('--delimiter', dest='delimiter', default=', ', help='delimiter character. (default=, comma-space)')
	parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore')
	parser.add_argument('--header', dest='header', action='store_false', default=True, help='print csv header (default=True)')
	parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)')
	parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process')
	parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000',
						help='number of records to keep in buffer before writing to disk (default=1000)')

	args = parser.parse_args()

	converter = xml2csv(args.input_file, args.output_file, args.encoding)
	num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore,
							header=args.header, limit=args.limit, buffer_size=args.buffer_size)

	print "\n\nWrote", num, "records to", args.output_file

示例#3

0

显示文件

文件： console.py 项目： bravokid47/xmlutils.py

def run_xml2csv():
    print("""xml2csv
    --help for help

    """)

    # parse arguments
    parser = argparse.ArgumentParser(description='Convert an xml file to csv format.')
    parser.add_argument('--input', dest='input_file', required=True, help='input xml filename')
    parser.add_argument('--output', dest='output_file', required=True, help='output csv filename')
    parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item')
    parser.add_argument('--delimiter', dest='delimiter', default=',', help='delimiter character. (default=,)')
    parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore')
    parser.add_argument('--noheader', dest='noheader', action='store_true', help='exclude csv header (default=False)')
    parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)')
    parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process')
    parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000',
                        help='number of records to keep in buffer before writing to disk (default=1000)')
    parser.add_argument('--noquotes', dest='noquotes', action='store_true', help='no quotes around values')

    args = parser.parse_args()

    converter = xml2csv(args.input_file, args.output_file, args.encoding)
    num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore,
                            noheader=args.noheader, limit=args.limit, buffer_size=args.buffer_size,
                            quotes=not args.noquotes)

    print("\n\nWrote", num, "records to", args.output_file)

示例#4

0

显示文件

文件： xml_to_csv.py 项目： sagittefrat/consumer-rationality

def run_on_all_xml(path_name, destination_path=None):

    from xmlutils.xml2csv import xml2csv
    path_name = os.path.abspath(path_name)
    #------open files:
    if os.path.exists(path_name) == None:
        print "ilegal path!"
        return

    # get all the xml from the folder
    tasks_list = []
    # a folder was sepcified
    if os.path.isdir(path_name):
        for filename in os.listdir(path_name):
            # create all the files to convert
            tasks_list.append(os.path.join(path_name, filename))

    #Perform the specified command on all specified tasks
    for task in tasks_list:

        if task[-4:] == '.xml':

            task_full_path = os.path.join(path_name, task)
            task_to_create = task_full_path[:-4] + '.csv'

            #if needs a name change:
            if task[-6:-4] == 'gz':
                task_to_create = change_xml_name(path_name, task)
                #print task_to_create

            # if a destination was mentioned
            if destination_path != None:
                task_csv = task[-4:] + '.csv'
                task_to_create = os.path.join(destination_path, task_csv)

            #actual converting from xml to csv:
            converter = xml2csv(task_full_path,
                                task_to_create,
                                encoding="utf-8")
            task_to_create = task_to_create.split('-')

            #if it's shupersal then the chiled is called 'Item', else it is called Product:
            if task_to_create[1] == '7290027600007':
                converter.convert(tag="Item")
            else:
                converter.convert(tag="Product")
            #remove xml file:
            os.remove(task_full_path)

        elif filename[-5:] == '.json':
            '''from xmlutils.xml2json import xml2json

示例#5

0

显示文件

文件： EMIScraper.py 项目： NigelCleland/lode

    def parse_xml_to_csv(self, fName, tag="Row"):
        """ This will convert an XML file to a CSV file.
        It is currently soft linked to work with the FK offers through
        the tag specification.

        However, this could likely be adapted if there are other XML
        formated that are specified.

        Parameters
        ----------
        fName: XML filename
        tag: XML tag information

        Returns:
        --------
        """

        output_name = fName.replace('.XML', '.csv')
        converter = xml2csv(fName, output_name, encoding="utf-8")
        converter.convert(tag=tag)

        return self

示例#6

0

显示文件

文件： console.py 项目： redmelu/xmlutils.py

def run_xml2csv():
    print """xml2csv by Kailash Nadh (http://nadh.in)
	--help for help

	"""

    # parse arguments
    parser = argparse.ArgumentParser(description="Convert an xml file to csv format.")
    parser.add_argument("--input", dest="input_file", required=True, help="input xml filename")
    parser.add_argument("--output", dest="output_file", required=True, help="output csv filename")
    parser.add_argument("--tag", dest="tag", required=True, help="the record tag. eg: item")
    parser.add_argument("--delimiter", dest="delimiter", default=",", help="delimiter character. (default=,)")
    parser.add_argument("--ignore", dest="ignore", default="", nargs="+", help="list of tags to ignore")
    parser.add_argument("--noheader", dest="noheader", action="store_true", help="exclude csv header (default=False)")
    parser.add_argument("--encoding", dest="encoding", default="utf-8", help="character encoding (default=utf-8)")
    parser.add_argument("--limit", type=int, dest="limit", default=-1, help="maximum number of records to process")
    parser.add_argument(
        "--buffer_size",
        type=int,
        dest="buffer_size",
        default="1000",
        help="number of records to keep in buffer before writing to disk (default=1000)",
    )

    args = parser.parse_args()

    converter = xml2csv(args.input_file, args.output_file, args.encoding)
    num = converter.convert(
        tag=args.tag,
        delimiter=args.delimiter,
        ignore=args.ignore,
        noheader=args.noheader,
        limit=args.limit,
        buffer_size=args.buffer_size,
    )

    print "\n\nWrote", num, "records to", args.output_file

示例#7

0

显示文件

文件： 3DS-Rename.py 项目： Medly13/3DS-Rename

def descargar():
    #descargar xml a csv
    try:
        archivoDescargar = "http://3dsdb.com/xml.php"
        inputs = urllib2.urlopen(archivoDescargar)
        output = "3dsdb.csv"
        converter = xml2csv(inputs, output, encoding="utf-8")
        converter.convert(tag="release",delimiter=";")

        #eliminar caracteres especiales y limpiar nombres
        original_string = open('3dsdb.csv').read()
        nuevo_string = re.sub('333;', 'o', original_string)
        nuevo_string = re.sub('&', 'and', nuevo_string)
        nuevo_string = re.sub(':', ' -', nuevo_string)
        nuevo_string = re.sub('"', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9][0-9]', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9]', '', nuevo_string)
        nuevo_string = re.sub(r'[\*|:<>?/#().]', '', nuevo_string)
        nuevo_string = re.sub('  ', ' ', nuevo_string)
        open('3dsdb.csv', 'w').write(nuevo_string)
        tkMessageBox.showinfo('Info', "The database has been successfully downloaded", icon='info')
    except:
        mostrarerror("Failed to download database")
        compdatabase()

示例#8

0

显示文件

文件： xml2csv.py 项目： researchgraph/crosswalks

	
for input_file in os.listdir('.'):
	if input_file.endswith(".xml"):
		numberOfFilesProcessed += 1
		NoHeader = False
		
		if numberOfFilesProcessed == 0:
			NoHeader = True
		else:
			NoHeader = False
		
		print("Converting " + input_file + " " + str(numberOfFilesProcessed) + " files out of " + str(numberOfFiles) + " completed")
		tree = et.parse(input_file)
		root = tree.getroot()
		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}datasets'): 
			converter = xml2csv(input_file,"./converted_versions/dataset/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}dataset",noheader=NoHeader)

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}grants'): 
			converter = xml2csv(input_file,"./converted_versions/grant/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}grant",noheader=NoHeader)

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}publications'): 
			converter = xml2csv(input_file,"./converted_versions/publication/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}publication",noheader=NoHeader,recordType="publication")

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}researchers'): 	
			converter = xml2csv(input_file,"./converted_versions/researcher/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}researcher",noheader=NoHeader,recordType="researcher")

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}relations'):

示例#9

0

显示文件

文件： assessment.py 项目： Gowthamcrr/bullhorn

# usr/bin/env python
import os
from xmlutils.xml2csv import xml2csv
import csv

for file in os.listdir("input"):
    if file.endswith(".xml"):
        input_url = os.path.join("input", file)
        output_url = os.path.join("output", file).replace(".xml", ".csv")
        output_temp_url = "output/temp.csv"
        #print(os.path.join("input", file))
        #print output_url
        converter = xml2csv(input_url, output_temp_url, encoding="utf-8")
        converter.convert(tag="Skill")
        with open(output_temp_url, 'r') as csvinput:
            with open(output_url, 'w') as csvoutput:
                writer = csv.writer(csvoutput)
                for row in csv.reader(csvinput):
                    writer.writerow(row + [file.split(".")[0]])
        os.system("rm -rf output/temp.csv")
        final_out = open("output/out.csv", "a")
        f = open(output_url)
        f.next()
        for line in f:
            final_out.write(line)
        f.close()
        final_out.close()

示例#10

0

显示文件

文件： console.py 项目： MuffinMillitia/automated_shipping_emails

def run_xml2csv():
    print """xml2csv by Kailash Nadh (http://nadh.in)
	--help for help

	"""

    # parse arguments
    parser = argparse.ArgumentParser(
        description='Convert an xml file to csv format.')
    parser.add_argument('--input',
                        dest='input_file',
                        required=True,
                        help='input xml filename')
    parser.add_argument('--output',
                        dest='output_file',
                        required=True,
                        help='output csv filename')
    parser.add_argument('--tag',
                        dest='tag',
                        required=True,
                        help='the record tag. eg: item')
    parser.add_argument('--delimiter',
                        dest='delimiter',
                        default=',',
                        help='delimiter character. (default=,)')
    parser.add_argument('--ignore',
                        dest='ignore',
                        default='',
                        nargs='+',
                        help='list of tags to ignore')
    parser.add_argument('--noheader',
                        dest='noheader',
                        action='store_true',
                        help='exclude csv header (default=False)')
    parser.add_argument('--encoding',
                        dest='encoding',
                        default='utf-8',
                        help='character encoding (default=utf-8)')
    parser.add_argument('--limit',
                        type=int,
                        dest='limit',
                        default=-1,
                        help='maximum number of records to process')
    parser.add_argument(
        '--buffer_size',
        type=int,
        dest='buffer_size',
        default='1000',
        help=
        'number of records to keep in buffer before writing to disk (default=1000)'
    )

    args = parser.parse_args()

    converter = xml2csv(args.input_file, args.output_file, args.encoding)
    num = converter.convert(tag=args.tag,
                            delimiter=args.delimiter,
                            ignore=args.ignore,
                            noheader=args.noheader,
                            limit=args.limit,
                            buffer_size=args.buffer_size)

    print "\n\nWrote", num, "records to", args.output_file

示例#11

0

显示文件

def convertCSV():
	array = ["tempadult.xml", "tempkids.xml", "tempteen.xml", "tempstory.xml", "tempbook.xml", "tempfriend.xml", fileIn]
	array2 = ["tempadults.csv", "tempkids.csv", "tempteen.csv", "tempstory.csv", "tempbook.csv", "tempfriend.csv", "First_Pull.csv"]
	for array, array2 in zip(array, array2):
		converter = xml2csv(array, array2, encoding="utf-8")
		converter.convert(tag="event")

示例#12

0

显示文件

文件： xml2csv.py 项目： keerthikorivi/SentimentAnalysis

from xmlutils.xml2csv import xml2csv

converter = xml2csv("Users.xml", "output.csv", encoding="utf-8")
converter.convert(tag="tag")

示例#13

0

显示文件

文件： utils.py 项目： rodekruis/IBF-model

def desinventar_clean_transform(input, output):
    """
    desinventar_clean_transform

    Simple script that maps desinventar databases (.xml) into IBF-system format

    Parameters
    ----------
    input : str
        name of input file (.xml)
    output : str
        name of output file (.csv)
    """

    # read DesInventar data and filter
    with open(input, 'r', encoding="utf8") as file:
        data = file.read()
    events = re.search('(?:<fichas>)[\s,\S]+(?:<\/fichas>)', data).group(0)
    with open('raw_data/xml_temp.xml', 'w', encoding='utf8') as file:
        file.write(events)

    # fix encoding and save as csv
    converter = xml2csv("raw_data/xml_temp.xml",
                        "raw_data/{}.csv".format(
                            input.split('/')[-1].split('.')[0]),
                        encoding="utf8")
    converter.convert(tag="TR")
    os.remove("raw_data/xml_temp.xml")

    # read DesInventar data as csv
    df = pd.read_csv("raw_data/{}.csv".format(
        input.split('/')[-1].split('.')[0]))

    # change some column names
    dict_columns = {
        'serial': 'x',
        'level0': 'adm1_pcode',
        'level1': 'adm2_pcode',
        'level2': 'adm3_pcode',
        'name0': 'adm1_name',
        'name1': 'adm2_name',
        'name2': 'adm3_name',
        'evento': 'disaster_type',
        'lugar': 'location',
        'fechano': 'year',
        'fechames': 'month',
        'fechadia': 'day',
        'muertos': 'people_dead',
        'heridos': 'people_injured',
        'desaparece': 'missing',
        'afectados': 'people_affected',
        'vivdest': 'house_destroyed',
        'vivafec': 'house_damaged',
        'fuentes': 'data_source_other',
        'valorloc': 'x',
        'valorus': 'x',
        'fechapor': 'x',
        'fechafec': 'date_recorded',
        'hay_muertos': 'x',
        'hay_heridos': 'x',
        'hay_deasparece': 'x',
        'hay_afectados': 'x',
        'hay_vivdest': 'x',
        'hay_vivafec': 'x',
        'hay_otros': 'x',
        'otros': 'x',
        'socorro': 'x',
        'salud': 'hospital_health_center',
        'educacion': 'school',
        'agropecuario': 'agriculture',
        'industrias': 'industry',
        'acueducto': 'aqueduct',
        'alcantarillado': 'sewerage_latrine',
        'energia': 'energy',
        'comunicaciones': 'communication',
        'causa': 'x',
        'descausa': 'x',
        'transporte': 'road',
        'magnitud2': 'x',
        'nhospitales': 'x',
        'nescuelas': 'x',
        'nhectareas': 'lost_crops_ha',
        'cabezas': 'livestock_lost',
        'kmvias': 'x',
        'duracion': 'x',
        'damnificados': 'x',
        'evacuados': 'evacuated',
        'hay_damnificados': 'x',
        'hay_evacuados': 'x',
        'hay_reubicados': 'x',
        'reubicados': 'people_displaced',
        'clave': 'x',
        'glide': 'disaster_id',
        'defaultab': 'x',
        'approved': 'x',
        'latitude': 'x',
        'longitude': 'x',
        'uu_id': 'x',
        'di_comments': 'comments'
    }
    df = df.rename(columns=dict_columns)
    df['disaster_type'] = df['disaster_type'].str.lower()

    # convert some variables to to int
    var_to_int = [
        'adm1_pcode', 'adm2_pcode', 'adm3_pcode', 'evacuated',
        'people_affected', 'people_dead', 'missing'
    ]
    df[var_to_int] = df[var_to_int].astype(int, errors='ignore')

    # merge some variables
    df['people_affected'] = df.apply(
        lambda x: sum_cols(x, 'people_affected', 'evacuated'), axis=1)
    df['people_dead'] = df.apply(
        lambda x: sum_cols(x, 'people_dead', 'missing'), axis=1)
    df = df.drop(columns=['x', 'evacuated', 'missing'])

    df['data_source'] = 'DesInventar'
    df['data_source_url'] = 'https://www.desinventar.net'
    df['date_event'] = pd.to_datetime(df[['year', 'month', 'day']],
                                      errors='coerce')
    df = df.drop(columns=['year', 'month', 'day'])
    df.to_csv(output)

    return df

示例#14

0

显示文件

文件： Auditv2.py 项目： Archa26p/Airflow_custom

def xmltocsv(filename):
    log('Start : xmltocsv')
    log('filename :' + filename)
    converter = xml2csv(filename + ".xml", filename + ".csv", encoding="utf-8")
    converter.convert(tag="G_1")

示例#15

0

显示文件

文件： main.py 项目： danish45007/File_convert_to_csv

async def check(file: UploadFile = File(...)):
 
    if file.filename.endswith('.csv'):
        df = pd.read_csv(file.file)
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }

    
    # if file == txt
    elif file.filename.endswith('.txt'):
        read_file = pd.read_csv(file.file)
        read_file.to_csv('txt_to_csv.csv',index=None)
        df = pd.read_csv('txt_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == pdf
    elif file.filename.endswith('.pdf'):
        c = pdftables_api.Client('upf6leimlx9u')
        c.csv(file.file, 'pdf_to_csv.csv')
        df = pd.read_csv('pdf_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == xls
    elif file.filename.endswith('.xls'):
        data_xls = pd.read_excel(file.file, 'Sheet1', index_col=None)
        data_xls.to_csv('xls_to_csv.csv', encoding='utf-8')
        df = pd.read_csv('xls_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == tsv
    elif file.filename.endswith('.tsv'):
        csv_file = pd.read_table(file.file,sep='\t')
        csv_file.to_csv('tsv_to_csv.csv',index=False)
        df = pd.read_csv('tsv_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == xml
    elif file.filename.endswith('.xml'):
        converter = xml2csv(file.file, "xml_to_csv.csv", encoding="utf-8")
        converter.convert(tag="tag_value_defined_by_user")
        df = pd.read_csv('xml_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }

    # if file == tf_record
    elif file.filename.endswith('.tf'):
        pass
        # content = txt_to_csv(file.read())
        # cli.set("uid",content)
        # return {"status":"done"}
    else:
        return {"error": "Enter a vaild file format"}

示例#16

0

显示文件

文件： myXml2Csv.py 项目： seiya0914/xml2csv

import xml.etree.ElementTree as et
import sys
import re
from xmlutils.xml2csv import xml2csv

input = str(sys.argv[1])

output_for_publication = 'publication.csv'
output_for_researcher = 'researcher.csv'
output_for_relationship = 'relationship.csv'
#output_file = str(sys.argv[2])

converter = xml2csv(input, "/publication/publication.csv")
converter.convert(
    tag="{http://researchgraph.org/schema/v2.0/xml/nodes}publication")

# tree = et.parse(input_file)

# root = tree.getroot()

# cols = []

# row = []

# dict={}

# for col in root[0][0]:
# 	cols.append(re.sub('\{.*?\}','',col.tag))

# for r in root[0][0]:
# 	print(r.text)