Пример #1
0
 def test_json_from_url(self):
     data = Json2xml.fromurl('https://coderwall.com/vinitcool76.json').data
     data_object = Json2xml(data)
     xml_output = data_object.json2xml()
     dict_from_xml = xmltodict.parse(xml_output)
     # since it's a valid XML, xml to dict is able to load it and return
     # elements from under the all tag of xml
     self.assertTrue(type(dict_from_xml['all']) == OrderedDict)
Пример #2
0
 def test_json_from_file_works(self):
     data = Json2xml.fromjsonfile('examples/example.json').data
     data_object = Json2xml(data)
     xml_output = data_object.json2xml()
     dict_from_xml = xmltodict.parse(xml_output)
     # since it's a valid XML, xml to dict is able to load it and return
     # elements from under the all tag of xml
     self.assertTrue(type(dict_from_xml['all']) == OrderedDict)
Пример #3
0
def json2xml(jsonfile, URL=False):
    try:
        from src.json2xml import Json2xml
    except:
        return "PLEASE INSTALL JSON2XML"
        break
    if URL == False:
        data = Json2xml.fromjsonfile(jsonfile).data
        data_object = Json2xml(data)
Пример #4
0
 def test_json_from_string(self):
     data = Json2xml.fromstring(
         '{"login":"******","id":1,"avatar_url":"https://avatars0.githubusercontent.com/u/1?v=4"}'
     ).data
     data_object = Json2xml(data)
     xml_output = data_object.json2xml()
     dict_from_xml = xmltodict.parse(xml_output)
     print('keys', dict_from_xml)
     # since it's a valid XML, xml to dict is able to load it and return
     # elements from under the all tag of xml
     self.assertTrue(type(dict_from_xml['all']) == OrderedDict)
Пример #5
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='Utility to convert json to valid xml.')
    parser.add_argument('--url', dest='url', action='store')
    parser.add_argument('--file', dest='file', action='store')
    args = parser.parse_args()

    if args.url:
        url = args.url
        data = Json2xml.fromurl(url)
        print(Json2xml.json2xml(data))

    if args.file:
        file = args.file
        data = Json2xml.fromjsonfile(file)
        print(Json2xml.json2xml(data))
Пример #6
0
def get_lga(collection):
	params = request.args
	format = params.get('format','xml')
	if not lgaStr in params and not filterStr in params:
		response = jsonify(error="Empty input")
		return reponse_format(response ,format, True),400
	
	if filterStr in params:
		queryStr,queryList = bnf2db.covert2DB(params[filterStr])
		result = DB.getDataByFilter(queryStr, queryList,collection)
		if not result:
			response = jsonify(error="Can't find it in DB")
			return reponse_format(response ,format, True),404
		result = dumps(result, indent=4)
		response = Response()
		response.data = result
		return reponse_format(response ,format, True),200

	suburbList = paramHandle(params)
	if not suburbList:
		response = jsonify(error="Invaild input, can't found suburb")
		return reponse_format(response ,format, True), 404
	
	fetch_list, exist_list = DB.checkListExistInDB(suburbList, collection)
	if len(exist_list) == 0:
		return jsonify(error="Can't find suburb info in DB"), 404
	
	data = DB.getListFile(collection, exist_list)
	data_object = Json2xml(data)
	jsonData = dumps(data, indent=4)
	xmlData = data_object.json2xml()
	formatData = xmlData
	if format == "json":
		formatData = jsonData
	return formatData
Пример #7
0
def reponse_format(response, format, isATOM):
	response.headers['Content-Type'] = 'application/xml'
	if format == 'json':
		response.headers['Content-Type'] = 'application/json'
		return response
	data = loads(response.data)
	data_object = Json2xml(data)
	xml = data_object.json2xml()
	response.data = xml
	return response
Пример #8
0
def get_all_lga(collection):
	params = request.form
	format = params.get('format', 'xml')
	data = DB.getAllFile(collection)
	data_object = Json2xml(data)
	jsonData = dumps(data, indent=4)
	xmlData = data_object.json2xml()
	formatData = xmlData
	if format == "json":
		formatData = jsonData
	return formatData
Пример #9
0
def get_single_lga(collection, suburb):
	params = request.args
	format = params.get('format','json')
	if not getPostcode.is_downloadable_suburb(suburb):
		response = jsonify(error="Invaild input")
		return reponse_format(response ,format, True),400
	
	data = DB.getFile(collection, suburb)
	if not data:
		return jsonify(error="Can't find " + suburb + " info in DB"), 404
	jsonData = dumps(data, indent=4)
	data_object = Json2xml(data)
	xmlData = data_object.json2xml()
	formatData = xmlData
	if format == "json":
		formatData = jsonData
	return formatData
Пример #10
0
def get_xml_configuration(final_content):
    '''
    :param final_content: json configuration
    :return: Returns xml configuration
    '''
    conf = {}
    conf_lst = []
    for data in final_content:
        conf_inner = {}
        name = data
        value = final_content[data]
        conf_inner['name'] = name
        conf_inner['value'] = value
        conf_lst.append(conf_inner)
    conf['property'] = conf_lst
    data_object = Json2xml(conf)
    conf_xml = data_object.json2xml()
    conf_xml = conf_xml.replace('<all>', '<configuration>')
    conf_xml = conf_xml.replace('</all>', '</configuration>')
    final_configuration = static_xml_content + conf_xml
    return final_configuration
Пример #11
0
from src.json2xml import Json2xml
data = Json2xml.fromurl('https://coderwall.com/vinitcool76.json').data
data_object = Json2xml(data)
xml_data = data_object.json2xml()  #xml output

with open('star.xml', 'w') as file:
    file.write(xml_data)
Пример #12
0
    def toXml(self):

        data = Json2xml.fromstring(self.toJson()).data
        dataConverter = Json2xml(data)

        return dataConverter.json2xml()
Пример #13
0
def json2xml(json_data):
    #took others libraries to help:
    #Library link:  https://github.com/vinitkumar/json2xml
    data_object = Json2xml(json_data)
    xml_output = data_object.json2xml()
    return xml_output
Пример #14
0
 def test_is_json_from_file_works(self):
     data = Json2xml.fromjsonfile('examples/example.json').data
     data_object = Json2xml(data)
     xml_output = data_object.json2xml()
     htmlkeys = xml_output.XML_FORMATTERS.keys()
     self.assertTrue('html' in htmlkeys)
Пример #15
0
    def get(self, buoy_id, buoy_data_type):

        # figure out the desired response format, json or XML
        response_mime = flask.request.accept_mimetypes.best_match(
            ['application/json', 'application/xml'])

        if buoy_data_type not in noaa_data_sets:
            response = {
                "message":
                "Unsupported NOAA data set {buoy_data_type}".format(
                    buoy_data_type=buoy_data_type)
            }
            if response_mime == 'application/xml':
                response_data = Json2xml(response).json2xml()
            else:
                response_data = json.dumps(response)
            return flask.Response(response_data,
                                  status=400,
                                  content_type=response_mime)

        bouy_request_url = noaa_buoy_url.format(buoyid=buoy_id,
                                                data_type=buoy_data_type)
        bouy_request = requests.get(bouy_request_url)

        if bouy_request.status_code == 404:
            # Pass through a 404 response from NOAA to the wave_thing client. This is how NOAA indicates
            # that a buoy does not have a data set
            return flask.Response("", status=404)
        elif bouy_request.status_code != 200:
            # The request to NOAA failed. Since this script didn't itself fail return a 502: bad gateway message
            response = {
                "message":
                "NOAA URL {noaa_url} returned response code {request_status_code}. Expecting 200"
                .format(noaa_url=bouy_request_url,
                        request_status_code=bouy_request.status_code),
                "upstream code":
                bouy_request.status_code
            }
            if response_mime == 'application/xml':
                response_data = Json2xml(response).json2xml()
            else:
                response_data = json.dumps(response)
            return flask.Response(response_data,
                                  status=502,
                                  content_type=response_mime)
        else:
            # request was good.
            buoy_response = {
                "buoy_id": buoy_id,
                "data_type": buoy_data_type,
                "data_points": [],
                "request_timestamp_utc": str(arrow.now('utc'))
            }

            # Ignore the comment lines that start with hash
            data_lines = filter(lambda line: not line.startswith("#"),
                                bouy_request.text.splitlines())

            # Loop over the remaining lines and pass the content to the appropriate decoding function as
            # that is mapped to the data type string. IE data type 'txt' -> txt_response_to_data_points
            for data_line in data_lines:
                buoy_response["data_points"].append(
                    noaa_data_sets[buoy_data_type]
                    (*
                     timestamp_from_noaa_format_and_normalize_for_missing_data(
                         data_line.split())))

            if response_mime == 'application/xml':
                response_data = Json2xml(buoy_response).json2xml()
            else:
                response_data = json.dumps(buoy_response)
            return flask.Response(response_data,
                                  status=200,
                                  content_type=response_mime)
Пример #16
0
    def parse(self, response):
        linkitem = LinkItem()
        linkitem['url'] = response.url
        linkitem['response'] = response.status
        linkitem['parsable'] = any(d in response.url for d in parsable_domain_list)

        yield linkitem

        try:

            rawhtml = response.xpath('//html').extract()[0]
            article = DP(html=rawhtml, url=response.url)
            article.get_domaininfo()
            article.inspect_date()
            url_retrieved = []
            url_validate = re.compile(r'^https?')
            # logging.info(article.date_flag)
            # logging.info(article.has_more)

            if article.date_flag:
                article.inspect_article()
                article.clean_data()

            if article.content_flag:
                articleitem = ArticleItem()
                instanceitem = InstanceItem()
                linkritem = LinkRItem()

                articleitem['author'] = article.author
                articleitem['url'] = response.url
                articleitem['title'] = article.title
                articleitem['datetime'] = article.unixtime
                articleitem['domain'] = article.domain

                yield articleitem

                # main article as an instance
                instanceitem['author'] = article.author
                instanceitem['url'] = response.url
                instanceitem['datetime'] = article.datetime
                instanceitem['unixtime'] = article.unixtime
                instanceitem['type'] = 'Article'
                instanceitem['text_body'] = article.content
                instanceitem['text_body_html'] = article.content_html
                instanceitem['likes'] = article.likes
                instanceitem['links_contained'] = []
                instanceitem['relevance'] = article.content_flag
                instanceitem['gen_time'] = time.time()
                for link in article.links:
                    if not url_validate.search(str(link['href'])) == None:
                        instanceitem['links_contained'].append(link['href'])
                        linkritem['link_from'] = response.url
                        linkritem['link_to'] = link['href']
                        linkritem['gen_time'] = instanceitem['gen_time']
                        yield linkritem
                        url_retrieved.append(str(link['href']))
                        yield scrapy.Request(str(link['href']), callback=self.parse)

                instanceitem['links_contained'] = ','.join(instanceitem['links_contained'])

                yield instanceitem

            if article.has_more:
                instance = IP(url=response.url)
                if instance.domain in json2xml_list:
                    instance.get_instanceinfo_json()
                    # logging.info(instance.json_xpath)

                    json_data = Json2xml.fromstring(response.xpath(instance.json_xpath).extract_first()).data
                    json_object = Json2xml(json_data).json2xml()

                    instance_iter = BeautifulSoup(json_object, 'lxml').select(instance.instance_selector)
                    # logging.info(len(instance_iter))
                    for i in instance_iter:
                        instanceitem['author'] = i.find(instance.author_selector).get_text()
                        instanceitem['url'] = response.url
                        instanceitem['datetime'] = i.find_all(instance.datetime_selector)[-1].get_text()
                        instanceitem['unixtime'] = time.mktime(dateparser.parse(instanceitem['datetime']).timetuple())
                        instanceitem['type'] = 'Comment'
                        instanceitem['text_body_html'] = ''
                        instanceitem['text_body'] = i.find_all(instance.content_selector)[-1].get_text()
                        instanceitem['likes'] = ''
                        instanceitem['id'] = i.find_all('url')[-1].get_text()
                        instanceitem['reply_to'] = ''
                        instanceitem['links_contained'] = re.findall(r'(https?://[^\s]+)', instanceitem['text_body'])
                        instanceitem['relevance'] = article.content_flag
                        instanceitem['gen_time'] = time.time()
                        for link in instanceitem['links_contained']:
                            if not url_validate.search(str(link)) == None:
                                linkritem['link_from'] = response.url
                                linkritem['link_to'] = str(link)
                                linkritem['gen_time'] = instanceitem['gen_time']
                                yield linkritem
                                url_retrieved.append(str(link))
                                yield scrapy.Request(str(link), callback=self.parse)

                        instanceitem['links_contained'] = ','.join(instanceitem['links_contained'])

                        if not instanceitem['text_body'] == None:
                            yield instanceitem

                else:
                    instance.get_instanceinfo()

                    instance_iter = response.xpath(instance.instance_xpath)
                    for i in instance_iter:
                        instanceitem['author'] = i.xpath(instance.author_xpath).extract_first()
                        instanceitem['url'] = response.url
                        instanceitem['datetime'] = i.xpath(instance.datetime_xpath).extract_first()
                        instanceitem['unixtime'] = time.mktime(dateparser.parse(instanceitem['datetime']).timetuple())
                        instanceitem['type'] = 'Comment'
                        instanceitem['text_body_html'] = i.xpath(instance.content_html_xpath).extract_first()
                        instanceitem['likes'] = i.xpath(instance.likes_xpath).extract_first()
                        instanceitem['id'] = i.xpath(instance.id_xpath).extract_first()
                        instanceitem['reply_to'] = i.xpath(instance.reply_to_xpath).extract_first()
                        instanceitem['links_contained'] = i.xpath(instance.links_contained_xpath).extract()
                        instanceitem['relevance'] = article.content_flag
                        instanceitem['gen_time'] = time.time()
                        for link in instanceitem['links_contained']:
                            if not url_validate.search(str(link)) == None:
                                linkritem['link_from'] = response.url
                                linkritem['link_to'] = str(link)
                                linkritem['gen_time'] = instanceitem['gen_time']
                                yield linkritem
                                url_retrieved.append(str(link))
                                yield scrapy.Request(str(link), callback=self.parse)

                        instanceitem['links_contained'] = ','.join(instanceitem['links_contained'])

                        if not instanceitem['text_body_html'] == None:
                            instanceitem['text_body'] = BeautifulSoup(instanceitem['text_body_html'],
                                                                      'lxml').get_text().strip()
                            yield instanceitem

        # if not len(url_retrieved) == 0:
        # url_retrieved = list(set(url_retrieved))

        # urlfile = open('urls.txt', 'a')
        # for link in url_retrieved:
        # urlfile.write("{}\n".format(link))
        # yield scrapy.Request(link, callback = self.parse)

        except Exception as e:
            pass