示例#1
0
def descargar():
    #descargar xml a csv
    try:
        archivoDescargar = "http://3dsdb.com/xml.php"
        inputs = urllib2.urlopen(archivoDescargar)
        output = "3dsdb.csv"
        converter = xml2csv(inputs, output, encoding="utf-8")
        converter.convert(tag="release", delimiter=";")

        #eliminar caracteres especiales y limpiar nombres
        original_string = open('3dsdb.csv').read()
        nuevo_string = re.sub('333;', 'o', original_string)
        nuevo_string = re.sub('&', 'and', nuevo_string)
        nuevo_string = re.sub(':', ' -', nuevo_string)
        nuevo_string = re.sub('"', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9][0-9]', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9]', '', nuevo_string)
        nuevo_string = re.sub(r'[\*|:<>?/#().]', '', nuevo_string)
        nuevo_string = re.sub('  ', ' ', nuevo_string)
        open('3dsdb.csv', 'w').write(nuevo_string)
        tkMessageBox.showinfo('Info',
                              "The database has been successfully downloaded",
                              icon='info')
    except:
        mostrarerror("Failed to download database")
        compdatabase()
示例#2
0
def run_xml2csv():
	print """xml2csv by Kailash Nadh (http://nadh.in)
	--help for help

	"""

	# parse arguments
	parser = argparse.ArgumentParser(description='Convert an xml file to csv format.')
	parser.add_argument('--input', dest='input_file', required=True, help='input xml filename')
	parser.add_argument('--output', dest='output_file', required=True, help='output csv filename')
	parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item')
	parser.add_argument('--delimiter', dest='delimiter', default=', ', help='delimiter character. (default=, comma-space)')
	parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore')
	parser.add_argument('--header', dest='header', action='store_false', default=True, help='print csv header (default=True)')
	parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)')
	parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process')
	parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000',
						help='number of records to keep in buffer before writing to disk (default=1000)')

	args = parser.parse_args()

	converter = xml2csv(args.input_file, args.output_file, args.encoding)
	num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore,
							header=args.header, limit=args.limit, buffer_size=args.buffer_size)

	print "\n\nWrote", num, "records to", args.output_file
示例#3
0
def run_xml2csv():
    print("""xml2csv
    --help for help

    """)

    # parse arguments
    parser = argparse.ArgumentParser(description='Convert an xml file to csv format.')
    parser.add_argument('--input', dest='input_file', required=True, help='input xml filename')
    parser.add_argument('--output', dest='output_file', required=True, help='output csv filename')
    parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item')
    parser.add_argument('--delimiter', dest='delimiter', default=',', help='delimiter character. (default=,)')
    parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore')
    parser.add_argument('--noheader', dest='noheader', action='store_true', help='exclude csv header (default=False)')
    parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)')
    parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process')
    parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000',
                        help='number of records to keep in buffer before writing to disk (default=1000)')
    parser.add_argument('--noquotes', dest='noquotes', action='store_true', help='no quotes around values')

    args = parser.parse_args()

    converter = xml2csv(args.input_file, args.output_file, args.encoding)
    num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore,
                            noheader=args.noheader, limit=args.limit, buffer_size=args.buffer_size,
                            quotes=not args.noquotes)

    print("\n\nWrote", num, "records to", args.output_file)
def run_on_all_xml(path_name, destination_path=None):

    from xmlutils.xml2csv import xml2csv
    path_name = os.path.abspath(path_name)
    #------open files:
    if os.path.exists(path_name) == None:
        print "ilegal path!"
        return

    # get all the xml from the folder
    tasks_list = []
    # a folder was sepcified
    if os.path.isdir(path_name):
        for filename in os.listdir(path_name):
            # create all the files to convert
            tasks_list.append(os.path.join(path_name, filename))

    #Perform the specified command on all specified tasks
    for task in tasks_list:

        if task[-4:] == '.xml':

            task_full_path = os.path.join(path_name, task)
            task_to_create = task_full_path[:-4] + '.csv'

            #if needs a name change:
            if task[-6:-4] == 'gz':
                task_to_create = change_xml_name(path_name, task)
                #print task_to_create

            # if a destination was mentioned
            if destination_path != None:
                task_csv = task[-4:] + '.csv'
                task_to_create = os.path.join(destination_path, task_csv)

            #actual converting from xml to csv:
            converter = xml2csv(task_full_path,
                                task_to_create,
                                encoding="utf-8")
            task_to_create = task_to_create.split('-')

            #if it's shupersal then the chiled is called 'Item', else it is called Product:
            if task_to_create[1] == '7290027600007':
                converter.convert(tag="Item")
            else:
                converter.convert(tag="Product")
            #remove xml file:
            os.remove(task_full_path)

        elif filename[-5:] == '.json':
            '''from xmlutils.xml2json import xml2json
示例#5
0
    def parse_xml_to_csv(self, fName, tag="Row"):
        """ This will convert an XML file to a CSV file.
        It is currently soft linked to work with the FK offers through
        the tag specification.

        However, this could likely be adapted if there are other XML
        formated that are specified.

        Parameters
        ----------
        fName: XML filename
        tag: XML tag information

        Returns:
        --------
        """

        output_name = fName.replace('.XML', '.csv')
        converter = xml2csv(fName, output_name, encoding="utf-8")
        converter.convert(tag=tag)

        return self
示例#6
0
def run_xml2csv():
    print """xml2csv by Kailash Nadh (http://nadh.in)
	--help for help

	"""

    # parse arguments
    parser = argparse.ArgumentParser(description="Convert an xml file to csv format.")
    parser.add_argument("--input", dest="input_file", required=True, help="input xml filename")
    parser.add_argument("--output", dest="output_file", required=True, help="output csv filename")
    parser.add_argument("--tag", dest="tag", required=True, help="the record tag. eg: item")
    parser.add_argument("--delimiter", dest="delimiter", default=",", help="delimiter character. (default=,)")
    parser.add_argument("--ignore", dest="ignore", default="", nargs="+", help="list of tags to ignore")
    parser.add_argument("--noheader", dest="noheader", action="store_true", help="exclude csv header (default=False)")
    parser.add_argument("--encoding", dest="encoding", default="utf-8", help="character encoding (default=utf-8)")
    parser.add_argument("--limit", type=int, dest="limit", default=-1, help="maximum number of records to process")
    parser.add_argument(
        "--buffer_size",
        type=int,
        dest="buffer_size",
        default="1000",
        help="number of records to keep in buffer before writing to disk (default=1000)",
    )

    args = parser.parse_args()

    converter = xml2csv(args.input_file, args.output_file, args.encoding)
    num = converter.convert(
        tag=args.tag,
        delimiter=args.delimiter,
        ignore=args.ignore,
        noheader=args.noheader,
        limit=args.limit,
        buffer_size=args.buffer_size,
    )

    print "\n\nWrote", num, "records to", args.output_file
示例#7
0
def descargar():
    #descargar xml a csv
    try:
        archivoDescargar = "http://3dsdb.com/xml.php"
        inputs = urllib2.urlopen(archivoDescargar)
        output = "3dsdb.csv"
        converter = xml2csv(inputs, output, encoding="utf-8")
        converter.convert(tag="release",delimiter=";")

        #eliminar caracteres especiales y limpiar nombres
        original_string = open('3dsdb.csv').read()
        nuevo_string = re.sub('333;', 'o', original_string)
        nuevo_string = re.sub('&', 'and', nuevo_string)
        nuevo_string = re.sub(':', ' -', nuevo_string)
        nuevo_string = re.sub('"', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9][0-9]', '', nuevo_string)
        nuevo_string = re.sub('Rev[0-9]', '', nuevo_string)
        nuevo_string = re.sub(r'[\*|:<>?/#().]', '', nuevo_string)
        nuevo_string = re.sub('  ', ' ', nuevo_string)
        open('3dsdb.csv', 'w').write(nuevo_string)
        tkMessageBox.showinfo('Info', "The database has been successfully downloaded", icon='info')
    except:
        mostrarerror("Failed to download database")
        compdatabase()
示例#8
0
	
for input_file in os.listdir('.'):
	if input_file.endswith(".xml"):
		numberOfFilesProcessed += 1
		NoHeader = False
		
		if numberOfFilesProcessed == 0:
			NoHeader = True
		else:
			NoHeader = False
		
		print("Converting " + input_file + " " + str(numberOfFilesProcessed) + " files out of " + str(numberOfFiles) + " completed")
		tree = et.parse(input_file)
		root = tree.getroot()
		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}datasets'): 
			converter = xml2csv(input_file,"./converted_versions/dataset/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}dataset",noheader=NoHeader)

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}grants'): 
			converter = xml2csv(input_file,"./converted_versions/grant/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}grant",noheader=NoHeader)

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}publications'): 
			converter = xml2csv(input_file,"./converted_versions/publication/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}publication",noheader=NoHeader,recordType="publication")

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}researchers'): 	
			converter = xml2csv(input_file,"./converted_versions/researcher/" + input_file.split('.xml')[0] + ".csv")
			converter.convert(tag="{http://researchgraph.org/schema/v2.0/xml/nodes}researcher",noheader=NoHeader,recordType="researcher")

		if root.findall('{http://researchgraph.org/schema/v2.0/xml/nodes}relations'): 
示例#9
0
# usr/bin/env python
import os
from xmlutils.xml2csv import xml2csv
import csv

for file in os.listdir("input"):
    if file.endswith(".xml"):
        input_url = os.path.join("input", file)
        output_url = os.path.join("output", file).replace(".xml", ".csv")
        output_temp_url = "output/temp.csv"
        #print(os.path.join("input", file))
        #print output_url
        converter = xml2csv(input_url, output_temp_url, encoding="utf-8")
        converter.convert(tag="Skill")
        with open(output_temp_url, 'r') as csvinput:
            with open(output_url, 'w') as csvoutput:
                writer = csv.writer(csvoutput)
                for row in csv.reader(csvinput):
                    writer.writerow(row + [file.split(".")[0]])
        os.system("rm -rf output/temp.csv")
        final_out = open("output/out.csv", "a")
        f = open(output_url)
        f.next()
        for line in f:
            final_out.write(line)
        f.close()
        final_out.close()
def run_xml2csv():
    print """xml2csv by Kailash Nadh (http://nadh.in)
	--help for help

	"""

    # parse arguments
    parser = argparse.ArgumentParser(
        description='Convert an xml file to csv format.')
    parser.add_argument('--input',
                        dest='input_file',
                        required=True,
                        help='input xml filename')
    parser.add_argument('--output',
                        dest='output_file',
                        required=True,
                        help='output csv filename')
    parser.add_argument('--tag',
                        dest='tag',
                        required=True,
                        help='the record tag. eg: item')
    parser.add_argument('--delimiter',
                        dest='delimiter',
                        default=',',
                        help='delimiter character. (default=,)')
    parser.add_argument('--ignore',
                        dest='ignore',
                        default='',
                        nargs='+',
                        help='list of tags to ignore')
    parser.add_argument('--noheader',
                        dest='noheader',
                        action='store_true',
                        help='exclude csv header (default=False)')
    parser.add_argument('--encoding',
                        dest='encoding',
                        default='utf-8',
                        help='character encoding (default=utf-8)')
    parser.add_argument('--limit',
                        type=int,
                        dest='limit',
                        default=-1,
                        help='maximum number of records to process')
    parser.add_argument(
        '--buffer_size',
        type=int,
        dest='buffer_size',
        default='1000',
        help=
        'number of records to keep in buffer before writing to disk (default=1000)'
    )

    args = parser.parse_args()

    converter = xml2csv(args.input_file, args.output_file, args.encoding)
    num = converter.convert(tag=args.tag,
                            delimiter=args.delimiter,
                            ignore=args.ignore,
                            noheader=args.noheader,
                            limit=args.limit,
                            buffer_size=args.buffer_size)

    print "\n\nWrote", num, "records to", args.output_file
示例#11
0
def convertCSV():
	array = ["tempadult.xml", "tempkids.xml", "tempteen.xml", "tempstory.xml", "tempbook.xml", "tempfriend.xml", fileIn]
	array2 = ["tempadults.csv", "tempkids.csv", "tempteen.csv", "tempstory.csv", "tempbook.csv", "tempfriend.csv", "First_Pull.csv"]
	for array, array2 in zip(array, array2):
		converter = xml2csv(array, array2, encoding="utf-8")
		converter.convert(tag="event")
示例#12
0
from xmlutils.xml2csv import xml2csv

converter = xml2csv("Users.xml", "output.csv", encoding="utf-8")
converter.convert(tag="tag")
示例#13
0
def desinventar_clean_transform(input, output):
    """
    desinventar_clean_transform

    Simple script that maps desinventar databases (.xml) into IBF-system format

    Parameters
    ----------
    input : str
        name of input file (.xml)
    output : str
        name of output file (.csv)
    """

    # read DesInventar data and filter
    with open(input, 'r', encoding="utf8") as file:
        data = file.read()
    events = re.search('(?:<fichas>)[\s,\S]+(?:<\/fichas>)', data).group(0)
    with open('raw_data/xml_temp.xml', 'w', encoding='utf8') as file:
        file.write(events)

    # fix encoding and save as csv
    converter = xml2csv("raw_data/xml_temp.xml",
                        "raw_data/{}.csv".format(
                            input.split('/')[-1].split('.')[0]),
                        encoding="utf8")
    converter.convert(tag="TR")
    os.remove("raw_data/xml_temp.xml")

    # read DesInventar data as csv
    df = pd.read_csv("raw_data/{}.csv".format(
        input.split('/')[-1].split('.')[0]))

    # change some column names
    dict_columns = {
        'serial': 'x',
        'level0': 'adm1_pcode',
        'level1': 'adm2_pcode',
        'level2': 'adm3_pcode',
        'name0': 'adm1_name',
        'name1': 'adm2_name',
        'name2': 'adm3_name',
        'evento': 'disaster_type',
        'lugar': 'location',
        'fechano': 'year',
        'fechames': 'month',
        'fechadia': 'day',
        'muertos': 'people_dead',
        'heridos': 'people_injured',
        'desaparece': 'missing',
        'afectados': 'people_affected',
        'vivdest': 'house_destroyed',
        'vivafec': 'house_damaged',
        'fuentes': 'data_source_other',
        'valorloc': 'x',
        'valorus': 'x',
        'fechapor': 'x',
        'fechafec': 'date_recorded',
        'hay_muertos': 'x',
        'hay_heridos': 'x',
        'hay_deasparece': 'x',
        'hay_afectados': 'x',
        'hay_vivdest': 'x',
        'hay_vivafec': 'x',
        'hay_otros': 'x',
        'otros': 'x',
        'socorro': 'x',
        'salud': 'hospital_health_center',
        'educacion': 'school',
        'agropecuario': 'agriculture',
        'industrias': 'industry',
        'acueducto': 'aqueduct',
        'alcantarillado': 'sewerage_latrine',
        'energia': 'energy',
        'comunicaciones': 'communication',
        'causa': 'x',
        'descausa': 'x',
        'transporte': 'road',
        'magnitud2': 'x',
        'nhospitales': 'x',
        'nescuelas': 'x',
        'nhectareas': 'lost_crops_ha',
        'cabezas': 'livestock_lost',
        'kmvias': 'x',
        'duracion': 'x',
        'damnificados': 'x',
        'evacuados': 'evacuated',
        'hay_damnificados': 'x',
        'hay_evacuados': 'x',
        'hay_reubicados': 'x',
        'reubicados': 'people_displaced',
        'clave': 'x',
        'glide': 'disaster_id',
        'defaultab': 'x',
        'approved': 'x',
        'latitude': 'x',
        'longitude': 'x',
        'uu_id': 'x',
        'di_comments': 'comments'
    }
    df = df.rename(columns=dict_columns)
    df['disaster_type'] = df['disaster_type'].str.lower()

    # convert some variables to to int
    var_to_int = [
        'adm1_pcode', 'adm2_pcode', 'adm3_pcode', 'evacuated',
        'people_affected', 'people_dead', 'missing'
    ]
    df[var_to_int] = df[var_to_int].astype(int, errors='ignore')

    # merge some variables
    df['people_affected'] = df.apply(
        lambda x: sum_cols(x, 'people_affected', 'evacuated'), axis=1)
    df['people_dead'] = df.apply(
        lambda x: sum_cols(x, 'people_dead', 'missing'), axis=1)
    df = df.drop(columns=['x', 'evacuated', 'missing'])

    df['data_source'] = 'DesInventar'
    df['data_source_url'] = 'https://www.desinventar.net'
    df['date_event'] = pd.to_datetime(df[['year', 'month', 'day']],
                                      errors='coerce')
    df = df.drop(columns=['year', 'month', 'day'])
    df.to_csv(output)

    return df
示例#14
0
def xmltocsv(filename):
    log('Start : xmltocsv')
    log('filename :' + filename)
    converter = xml2csv(filename + ".xml", filename + ".csv", encoding="utf-8")
    converter.convert(tag="G_1")
示例#15
0
async def check(file: UploadFile = File(...)):
 
    if file.filename.endswith('.csv'):
        df = pd.read_csv(file.file)
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }

    
    # if file == txt
    elif file.filename.endswith('.txt'):
        read_file = pd.read_csv(file.file)
        read_file.to_csv('txt_to_csv.csv',index=None)
        df = pd.read_csv('txt_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == pdf
    elif file.filename.endswith('.pdf'):
        c = pdftables_api.Client('upf6leimlx9u')
        c.csv(file.file, 'pdf_to_csv.csv')
        df = pd.read_csv('pdf_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == xls
    elif file.filename.endswith('.xls'):
        data_xls = pd.read_excel(file.file, 'Sheet1', index_col=None)
        data_xls.to_csv('xls_to_csv.csv', encoding='utf-8')
        df = pd.read_csv('xls_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == tsv
    elif file.filename.endswith('.tsv'):
        csv_file = pd.read_table(file.file,sep='\t')
        csv_file.to_csv('tsv_to_csv.csv',index=False)
        df = pd.read_csv('tsv_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }
    
    # if file == xml
    elif file.filename.endswith('.xml'):
        converter = xml2csv(file.file, "xml_to_csv.csv", encoding="utf-8")
        converter.convert(tag="tag_value_defined_by_user")
        df = pd.read_csv('xml_to_csv.csv')
        df_head = df.head(5)
        df_tail = df.tail(5)
        # cli.set("userId",str(df))
        head = df_head.to_json(orient='records')
        tail = df_tail.to_json(orient='records')
        head = eval(head)
        tail = eval(tail)
        return {
            "head":head,
            "tail":tail
        }

    # if file == tf_record
    elif file.filename.endswith('.tf'):
        pass
        # content = txt_to_csv(file.read())
        # cli.set("uid",content)
        # return {"status":"done"}
    else:
        return {"error": "Enter a vaild file format"}
示例#16
0
import xml.etree.ElementTree as et
import sys
import re
from xmlutils.xml2csv import xml2csv

input = str(sys.argv[1])

output_for_publication = 'publication.csv'
output_for_researcher = 'researcher.csv'
output_for_relationship = 'relationship.csv'
#output_file = str(sys.argv[2])

converter = xml2csv(input, "/publication/publication.csv")
converter.convert(
    tag="{http://researchgraph.org/schema/v2.0/xml/nodes}publication")

# tree = et.parse(input_file)

# root = tree.getroot()

# cols = []

# row = []

# dict={}

# for col in root[0][0]:
# 	cols.append(re.sub('\{.*?\}','',col.tag))

# for r in root[0][0]:
# 	print(r.text)