def get_hash(xml_string): """ Get the has of an XML String :param xml_string: XML String to hash :return: """ hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=hash_parser) # parse the XML String removing blanks, comments, processing instructions try: xml_tree = etree.parse(BytesIO(xml_string.encode('utf-8'))) except: xml_tree = etree.parse(BytesIO(xml_string)) # remove all annotations annotations = xml_tree.findall( ".//{http://www.w3.org/2001/XMLSchema}annotation") for annotation in annotations: annotation.getparent().remove(annotation) clean_xml_string = etree.tostring(xml_tree) # transform into dict and order it xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict) clean_ordered_xml_string = str(sort_dict(xml_dict)) # compute the hash hash = hashlib.sha1(clean_ordered_xml_string) return hash.hexdigest()
def get_hash(xml_string): """ Get the hash of an XML String. Removes blank text, comments, processing instructions and annotations from the input. Allows to retrieve the same hash for two similar XML string. Args: xml_string (str): XML String to hash Returns: str: SHA-1 hash of the XML string """ # Load the required parser hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=hash_parser) xml_tree = XSDTree.build_tree(xml_string) # Remove all annotations annotations = xml_tree.findall( ".//{http://www.w3.org/2001/XMLSchema}annotation") for annotation in annotations: annotation.getparent().remove(annotation) clean_xml_string = XSDTree.tostring(xml_tree) # Parse XML string into dict xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict) # Returns the SHA-1 hash of the ordered dict return hash_dict(xml_dict)
def test_reload_restriction(self): # FIXME relaod restriction doesn't work xsd_files = join('restriction', 'basic') xsd_tree = self.simple_type_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( '/xs:schema/xs:simpleType', namespaces=self.request.session['namespaces'])[0] self.request.session['curate_edit'] = True xml_tree = self.simple_type_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) xml_value = xml_tree.xpath( "/root", namespaces=self.request.session['namespaces'])[0].text clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) result_string = generate_simple_type(self.request, xsd_element, xsd_tree, full_path='/root', default_value=xml_value, edit_data_tree=edit_data_tree) # print result_string expected_dict = self.simple_type_data_handler.get_json(xsd_files + '.reload') self.assertDictEqual(result_string[1], expected_dict)
def test_reload_sequence_unbounded(self): # fixme reload sequence unbounded has a bug # fixme choice iter and inner element repeated xsd_files = join("sequence", "unbounded") xsd_tree = self.choice_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( "/xs:schema/xs:complexType/xs:choice", namespaces=self.request.session["namespaces"] )[0] self.request.session["curate_edit"] = True xml_tree = self.choice_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode("utf-8"))) result_string = generate_choice( self.request, xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree ) # print result_string expected_element = self.choice_data_handler.get_json(xsd_files + ".reload") self.assertDictEqual(result_string[1], expected_element)
def test_multiple(self): xsd_files = join('multiple', 'basic') xsd_tree = self.extension_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( '/xs:schema/xs:element/xs:complexType/xs:complexContent/xs:extension', namespaces=self.request.session['namespaces'])[0] xml_tree = self.extension_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) # default_value = edit_data_tree.xpath('/root[1]', namespaces=self.request.session['namespaces']) result_string = generate_extension(self.request, xsd_element, xsd_tree, full_path='/root[1]', default_value=edit_data_tree, edit_data_tree=edit_data_tree) # print result_string # result_string = '<div>' + result_string + '</div>' expected_dict = self.extension_data_handler.get_json(xsd_files + '.reload') self.assertDictEqual(result_string[1], expected_dict)
def get_hash(xml_string): """ Get the has of an XML String :param xml_string: XML String to hash :return: """ hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=hash_parser) # parse the XML String removing blanks, comments, processing instructions try: xml_tree = etree.parse(BytesIO(xml_string.encode('utf-8'))) except: xml_tree = etree.parse(BytesIO(xml_string)) # remove all annotations annotations = xml_tree.findall(".//{http://www.w3.org/2001/XMLSchema}annotation") for annotation in annotations: annotation.getparent().remove(annotation) clean_xml_string = etree.tostring(xml_tree) # transform into dict and order it xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict) clean_ordered_xml_string = str(sort_dict(xml_dict)) # compute the hash hash = hashlib.sha1(clean_ordered_xml_string) return hash.hexdigest()
def setup_html(): global etree try: from lxml import etree except ImportError: print("can't validate the XHTML parts in Jinja2 templates" " (no lxml installed)") if etree and pv(etree.__version__) < pv('2.0.0'): # 2.0.7 and 2.1.x are known to work. print("can't validate the XHTML parts in Jinja2 templates" " (lxml < 2.0, api incompatibility)") if etree: # Note: this code derived from trac/tests/functional (FIXME) class Resolver(etree.Resolver): # ./contrib/jinjachecker.py # <- we live here # ./trac/tests/functional/ # <- there are the DTDs contrib_dir = dirname(abspath(__file__)) base_dir = normpath(join(contrib_dir, '../trac/tests/functional')) def resolve(self, system_url, public_id, context): filename = join(self.base_dir, system_url.split("/")[-1]) return self.resolve_filename(filename, context) parser = etree.XMLParser(dtd_validation=True) parser.resolvers.add(Resolver()) etree.set_default_parser(parser) return etree
def test_reload_multiple(self): # fixme test broken xsd_files = join('multiple', 'basic') xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( '/xs:schema/xs:complexType', namespaces=self.request.session['namespaces'])[0] self.request.session['curate_edit'] = True xml_tree = self.complex_type_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) result_string = generate_complex_type(self.request, xsd_element, xsd_tree, full_path='/root', edit_data_tree=edit_data_tree) # print result_string expected_element = self.complex_type_data_handler.get_json(xsd_files + '.reload') self.assertDictEqual(result_string[1], expected_element)
def import_file(self, source): """imports BuildingSync file :param source: string | object, path to file or a file like object :param require_version: bool, if true it raises an exception if unable to find version info """ parser = etree.XMLParser(remove_blank_text=True) etree.set_default_parser(parser) # save element tree if isinstance(source, str): if not os.path.isfile(source): raise ParsingError("File not found: {}".format(source)) with open(source) as f: self.element_tree = etree.parse(f) else: self.element_tree = etree.parse(source) self.version = self._parse_version() # if the namespace map is missing the auc or xsi prefix, fix the tree to include it root_nsmap = self.element_tree.getroot().nsmap if root_nsmap.get('auc') is None or root_nsmap.get('xsi') is None: self.fix_namespaces() root = self.element_tree.getroot() root.set( '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation', 'http://buildingsync.net/schemas/bedes-auc/2019 https://raw.githubusercontent.com/BuildingSync/schema/v{}/BuildingSync.xsd' .format(self.version)) return True
def test_reload_restriction(self): # FIXME relaod restriction doesn't work xsd_files = join('restriction', 'basic') xsd_tree = self.simple_type_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath('/xs:schema/xs:simpleType', namespaces=self.request.session['namespaces'])[0] self.request.session['curate_edit'] = True xml_tree = self.simple_type_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) xml_value = xml_tree.xpath("/root", namespaces=self.request.session['namespaces'])[0].text clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) result_string = generate_simple_type(self.request, xsd_element, xsd_tree, full_path='/root', default_value=xml_value, edit_data_tree=edit_data_tree) # print result_string expected_dict = self.simple_type_data_handler.get_json(xsd_files + '.reload') self.assertDictEqual(result_string[1], expected_dict)
def test_reload_simple_type_unbounded(self): xsd_files = join('simple_type', 'unbounded') xsd_tree = self.element_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( '/xs:schema/xs:complexType/xs:sequence/xs:element', namespaces=self.request.session['namespaces'])[0] self.request.session['curate_edit'] = True xml_tree = self.element_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) result_string = generate_element(self.request, xsd_element, xsd_tree, full_path='/root', edit_data_tree=edit_data_tree) expected_element = self.element_data_handler.get_json(xsd_files + '.reload') self.assertDictEqual(result_string[1], expected_element)
def download_xml_build_req(request): #POST request if request.method == 'POST': if 'xmlStringOAIPMH' in request.session: #We retrieve the XML file in session xmlDataObject = request.session['xmlStringOAIPMH'] try: # Load a parser able to clean the XML from blanks, comments and processing instructions clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) # set the parser etree.set_default_parser(parser=clean_parser) # load the XML tree from the text xmlDoc = etree.XML(str(xmlDataObject.encode('utf-8'))) xmlStringEncoded = etree.tostring(xmlDoc, pretty_print=True) except: xmlStringEncoded = xmlDataObject #Get the date to append it to the file title i = datetime.datetime.now() title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat() #Use the XML2Download collection to save the XML to download. We can't directly return the XML #because this method is called via Ajax. We need to save the XML and call the GET request of this function #in the success part of the Ajax call xml2download = XML2Download(title=title, xml=xmlStringEncoded).save() xml2downloadID = str(xml2download.id) #Return the ID to call the GET request with it response_dict = {"xml2downloadID": xml2downloadID} return HttpResponse(json.dumps(response_dict), content_type='application/javascript') else: return HttpResponseBadRequest( 'An error occured. Please reload the page and try again.') else: #Get the XML2Download ID xml2downloadID = request.GET.get('id', None) if xml2downloadID is not None: #Get the XML xmlDataObject = XML2Download.objects.get(pk=xml2downloadID) #Encode the XML xmlStringEncoded = xmlDataObject.xml.encode('utf-8') fileObj = StringIO(xmlStringEncoded) #Delete the record xmlDataObject.delete() #Check that the file is ending by .xml if not xmlDataObject.title.lower().endswith('.xml'): xmlDataObject.title += ".xml" #Return the XML file response = HttpResponse(FileWrapper(fileObj), content_type='application/xml') response[ 'Content-Disposition'] = 'attachment; filename=' + xmlDataObject.title request.session['xmlStringOAIPMH'] = xmlStringEncoded return response else: return redirect('/')
def read_xml_file(xml_path, namespace_dict=None, remove_comments=False): parser = etree.XMLParser(remove_comments=remove_comments) tree = etree.parse(xml_path, parser=parser) etree.set_default_parser(parser) if namespace_dict: for prefix, uri in namespace_dict.items(): etree.register_namespace(prefix, uri) return tree
def cleanXML(self, xml): clean_parser = etree.XMLParser(remove_blank_text=True,remove_comments=True,remove_pis=True) # set the parser etree.set_default_parser(parser=clean_parser) # load the XML tree from the text xmlEncoding = etree.XML(str(xml.encode('utf-8'))) xmlStr = etree.tostring(xmlEncoding) return xmlStr
def __init__(self, filename=None, log=None): self.log = log if filename is not None: self.filename = filename parser = ET.XMLParser(remove_blank_text=True) ET.set_default_parser(parser)
def MyParser(): if etree.get_default_parser is XSD_PARSER: yield else: etree.set_default_parser(XSD_PARSER) try: yield finally: etree.set_default_parser()
def __init__(self, **kwargs): """Constructs an SVGParser object. Arguments: **kwargs: See lxml.etree.XMLParser.__init__(). """ self._parser = etree.XMLParser(**kwargs) self._parser.set_element_class_lookup(SVGElementClassLookup()) etree.set_default_parser(self._parser)
def resolve_dependencies(request): print 'BEGIN def resolveDependencies(request)' schema_locations = request.POST.getlist('schemaLocations[]') dependencies = request.POST.getlist('dependencies[]') if ('uploadObjectName' in request.session and request.session['uploadObjectName'] is not None and 'uploadObjectFilename' in request.session and request.session['uploadObjectFilename'] is not None and 'uploadObjectContent' in request.session and request.session['uploadObjectContent'] is not None and 'uploadObjectType' in request.session and request.session['uploadObjectType'] is not None): object_content = request.session['uploadObjectContent'] name = request.session['uploadObjectName'] filename = request.session['uploadObjectFilename'] object_type = request.session['uploadObjectType'] # Load a parser able to clean the XML from blanks, comments and processing instructions clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) # set the parser etree.set_default_parser(parser=clean_parser) xsd_tree = etree.XML(str(object_content.encode('utf-8'))) # replace includes/imports by API calls (get dependencies starting by the imports) update_dependencies(xsd_tree, dict(zip(schema_locations, dependencies))) # validate the schema error = validate_xml_schema(xsd_tree) if error is None: object_content = etree.tostring(xsd_tree) # create a new version if 'uploadVersion' in request.session and request.session['uploadVersion'] is not None: object_versions_id = request.session['uploadVersion'] if object_type == 'Template': new_template = create_template_version(object_content, filename, object_versions_id) redirect = '/admin/manage_versions?type={0}&id={1}'.format(object_type, str(new_template.id)) elif object_type == 'Type': new_type = create_type_version(object_content, filename, object_versions_id) redirect = '/admin/manage_versions?type={0}&id={1}'.format(object_type, str(new_type.id)) # create new object else: # save the object if object_type == "Template": create_template(object_content, name, filename, dependencies) redirect = '/admin/xml-schemas/manage-schemas' elif object_type == "Type": if 'uploadBuckets' in request.session and request.session['uploadBuckets'] is not None: buckets = request.session['uploadBuckets'] create_type(object_content, name, filename, buckets, dependencies) redirect = '/admin/xml-schemas/manage-types' response_dict = {'redirect': redirect} messages.add_message(request, messages.INFO, '{} uploaded with success.'.format(object_type)) return HttpResponse(json.dumps(response_dict), content_type='application/javascript') else: response_dict = {'errorDependencies': error.replace("'", "")} return HttpResponse(json.dumps(response_dict), content_type='application/javascript')
def __init__(self, k, ref): etree.set_default_parser(etree.HTMLParser()) self.s = requests.Session() # CZ: 213.211.36.146:8080 #self.s.proxies={'https': '187.141.117.215:8080'} self.s.headers = self.headers.copy() # 'https://www.google.com/recaptcha/api2/demo' self.s.headers['Referer'] = ref self.s.params = {'k': k} self.k = k
def __init__(self): etree.set_default_parser(etree.HTMLParser()) #self.sessid=randint(0, 1000000) super().__init__(pp, { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0' }, proxy_headers=proxy_headers, timeout=timeout) self.baseurl = None
def download_xml_build_req(request): #POST request if request.method == 'POST': if 'xmlStringOAIPMH' in request.session: #We retrieve the XML file in session xmlDataObject = request.session['xmlStringOAIPMH'] try: # Load a parser able to clean the XML from blanks, comments and processing instructions clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) # set the parser etree.set_default_parser(parser=clean_parser) # load the XML tree from the text xmlDoc = etree.XML(str(xmlDataObject.encode('utf-8'))) xmlStringEncoded = etree.tostring(xmlDoc, pretty_print=True) except: xmlStringEncoded = xmlDataObject #Get the date to append it to the file title i = datetime.datetime.now() title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat() #Use the XML2Download collection to save the XML to download. We can't directly return the XML #because this method is called via Ajax. We need to save the XML and call the GET request of this function #in the success part of the Ajax call xml2download = XML2Download(title=title, xml=xmlStringEncoded).save() xml2downloadID = str(xml2download.id) #Return the ID to call the GET request with it response_dict = {"xml2downloadID": xml2downloadID} return HttpResponse(json.dumps(response_dict), content_type='application/javascript') else: return HttpResponseBadRequest('An error occured. Please reload the page and try again.') else: #Get the XML2Download ID xml2downloadID = request.GET.get('id', None) if xml2downloadID is not None: #Get the XML xmlDataObject = XML2Download.objects.get(pk=xml2downloadID) #Encode the XML xmlStringEncoded = xmlDataObject.xml.encode('utf-8') fileObj = StringIO(xmlStringEncoded) #Delete the record xmlDataObject.delete() #Check that the file is ending by .xml if not xmlDataObject.title.lower().endswith('.xml'): xmlDataObject.title += ".xml" #Return the XML file response = HttpResponse(FileWrapper(fileObj), content_type='application/xml') response['Content-Disposition'] = 'attachment; filename=' + xmlDataObject.title request.session['xmlStringOAIPMH'] = xmlStringEncoded return response else: return redirect('/')
def __init__(self): etree.set_default_parser(etree.HTMLParser()) super().__init__( pp, headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0', # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', # 'Accept-Encoding': 'gzip, deflate', }, timeout=180) self.cache = basedir
def main(): number_of_args = len(sys.argv) parser = etree.XMLParser(remove_comments=False) etree.set_default_parser(parser) if (number_of_args < 6): print_help() exit(-1) folder = sys.argv[1] numberofrobots = int(sys.argv[2]) alpha = float(sys.argv[3]) rho = float(sys.argv[4]) speed = float(sys.argv[5]) generated_configs_folder = folder + "/generated_configs" if not os.path.exists(generated_configs_folder): os.makedirs(generated_configs_folder) tree = etree.parse(folder + "/kilobot_generic_controller.argos") root = tree.getroot() for params in root.iter('params'): # print(params.attrib) if (params.get("behavior") == "build/behaviors_simulation/CRWLEVY_2.0_0.90"): params.set( "behavior", "build/behaviors_simulation/CRWLEVY_" + "%.1f_" % alpha + "%.2f" % rho) if (params.get("linearvelocity")): params.set("linearvelocity", "%.2f" % speed) for loop_functions in root.iter('loop_functions'): # print(loop_functions.attrib) loop_functions.set("alpha", "%.1f" % alpha) loop_functions.set("rho", "%.2f" % rho) loop_functions.set("num_robots", "%d" % numberofrobots) loop_functions.set("speed", "%.2f" % speed) tree.write(generated_configs_folder + "/kilobot_sim_%.02f_%d_%.1f_%.2f.argos" % (speed, numberofrobots, alpha, rho), xml_declaration=True) comments = tree.xpath('//comment()') for c in comments: p = c.getparent() p.remove(c)
def get_flat(self): """ Returns the flattened file Returns: """ # builds the parser parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) # sets the parser etree.set_default_parser(parser=parser) # parse the XML String removing blanks, comments, processing instructions xml_tree = XSDTree.build_tree(self.xml_string) # replace the includes by their content return self._replace_all_includes_by_content(xml_tree)
def test_reverse24x24conversion(self): self.maxDiff = None for dirpath, dirnames, filenames in os.walk(GEN_DIR): for f in filenames: # Generated filepath gen_filepath = os.path.join(dirpath, f) # Filter out files if not (f.endswith('.svg') and '/24' in gen_filepath) or os.path.islink(gen_filepath): continue etree.set_default_parser( etree.XMLParser(remove_blank_text=True)) # Generated root gen_root = etree.parse(gen_filepath).getroot() # Res filepath and root res_filepath = gen_filepath.replace(GEN_DIR, RES_DIR, 1).replace('/24', '/22') res_root = etree.parse(res_filepath).getroot() # Resize to 22x22 gen_root.set('viewBox', "0 0 22 22") gen_root.set('width', "22") gen_root.set('height', "22") # Remove group that moves content down 1px, right 1px group = gen_root.find('./{http://www.w3.org/2000/svg}g', NAMESPACES) gen_root.extend(get_renderable_elements(group)) gen_root.remove(group) # Compare elements and attributes for gen_elem, res_elem in zip( res_root.iterfind('./svg:*', NAMESPACES), gen_root.iterfind('./svg:*', NAMESPACES)): self.assertEqual(res_elem.tag, gen_elem.tag, gen_filepath) for attribute in res_elem.items(): self.assertIn(attribute, gen_elem.items(), gen_filepath) pass
def test_reload_attribute(self): xsd_files = join('attribute', 'basic') xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath('/xs:schema/xs:complexType', namespaces=self.request.session['namespaces'])[0] self.request.session['curate_edit'] = True xml_tree = self.complex_type_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) result_string = generate_complex_type(self.request, xsd_element, xsd_tree, full_path='/root', edit_data_tree=edit_data_tree) # print result_string expected_element = self.complex_type_data_handler.get_json(xsd_files + '.reload') self.assertDictEqual(result_string[1], expected_element)
def main(): number_of_args = len(sys.argv) parser = etree.XMLParser(remove_comments=False) etree.set_default_parser(parser) if (number_of_args < 2): print_help() exit(-1) folder = sys.argv[1] generated_configs_folder = folder if not os.path.exists(generated_configs_folder): os.makedirs(generated_configs_folder) tree = etree.parse(folder + "/kilobot_generic_controller.argos") root = tree.getroot() speed = "0.0" for params in root.iter('params'): # print(params.attrib) speed = params.get("linearvelocity", speed) for loop_functions in root.iter('loop_functions'): # print(loop_functions.attrib) loop_functions.set("speed", speed) visualization = etree.SubElement(root, "visualization") qt_opengl = etree.SubElement(visualization, "qt-opengl") camera = etree.SubElement(qt_opengl, "camera") placement = etree.SubElement(camera, "placement", idx="0", position="-0.616296,0.025,0.461661", look_at="0.0978462,0.025,-0.23834", up="0.700001,0,0.714142", lens_focal_length="20") tree.write(generated_configs_folder + "/kilobot_generic_controller_viz.argos", xml_declaration=True)
def get_flat(self): parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=parser) # parse the XML String removing blanks, comments, processing instructions xmlTree = etree.parse(BytesIO(self.xmlString.encode('utf-8'))) # check if it has includes includes = xmlTree.findall("{http://www.w3.org/2001/XMLSchema}include") if len(includes) > 0: for el_include in includes: uri = el_include.attrib['schemaLocation'] flatDependency = self.get_flat_dependency(uri) if flatDependency is not None: # append flatDependency to the tree dependencyTree = etree.fromstring(flatDependency) dependencyElements = dependencyTree.getchildren() for element in dependencyElements: xmlTree.getroot().append(element) el_include.getparent().remove(el_include) return etree.tostring(xmlTree)
def test_sequence(self): xsd_files = join('sequence', 'basic') xsd_tree = self.extension_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath('/xs:schema/xs:element/xs:complexType/xs:complexContent/xs:extension', namespaces=self.session['namespaces'])[0] xml_tree = self.extension_data_handler.get_xml(xsd_files) xml_data = etree.tostring(xml_tree) xml_value = '' clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=clean_parser) # load the XML tree from the text edit_data_tree = etree.XML(str(xml_data.encode('utf-8'))) result_string = generate_extension(self.request, xsd_element, xsd_tree, full_path='/root[1]', default_value=xml_value, edit_data_tree=edit_data_tree) # print result_string # result_string = '<div>' + result_string + '</div>' expected_dict = self.extension_data_handler.get_json(xsd_files+'.reload') self.assertDictEqual(result_string[1], expected_dict)
def get_tree_and_root_by_file_name(input_file_name=None, log_file=None, verbosity=None): me = 'get_tree_and_root_by_file_name' ''' Parse given input file as an xml document For any exception, write it to give logfile and return None, None Parse the document and return tuple of doctree and doctree.get_root() ''' #parser = etree.XMLParser(remove_comments=False, remove_blank_text=True) parser = etree.XMLParser(encoding='utf-8',remove_blank_text=True, remove_comments=False) etree.set_default_parser(parser) try: with open(input_file_name, mode='r', encoding='utf-8-sig') \ as input_bytes_file: try: tree = etree.parse(input_bytes_file, parser=parser) except Exception as e: log_msg = ( "{}:Skipping exception='{}' in input_file_name='{}'" .format(me, repr(e), input_file_name)) print(log_msg, file=log_file) return None, None except Exception as ex: msg=f"{me}: skipping exception to read {input_file_name}" print(msg, file=log_file, flush=True) print(msg, flush=True) return None, None # end with open return tree, tree.getroot()
#!/usr/bin/env python3 from lxml import etree etree.set_default_parser(etree.HTMLParser()) import os import subprocess import requests from urllib.parse import urljoin from io import BytesIO tmpdir = './tmp/' indexes = [ 'http://www.budget.gov.au/2014-15/content/bp1/html/index.htm', 'http://www.budget.gov.au/2014-15/content/bp2/html/index.htm', 'http://www.budget.gov.au/2014-15/content/bp3/html/index.htm', 'http://www.budget.gov.au/2014-15/content/bp4/html/index.htm' ] chunk_size = 4096 def main(): pdfs = [] for index_uri in indexes: print("up to:", index_uri) data = requests.get(index_uri).content et = etree.parse(BytesIO(data)) for elem in et.xpath('//a[contains(@href, ".pdf")]'): href = elem.get('href') if href.find('consolidated') == -1: continue idx = len(pdfs) pdf = os.path.join(tmpdir, '%d.pdf' % (idx)) pdfs.append(pdf)
from scrapi.base.transformer import XMLTransformer, JSONTransformer from scrapi.base.helpers import ( updated_schema, build_properties, oai_get_records_and_token, compose, date_formatter, null_on_error, coerce_to_list ) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) etree.set_default_parser(etree.XMLParser(recover=True)) class HarvesterMeta(abc.ABCMeta): def __init__(cls, name, bases, dct): super(HarvesterMeta, cls).__init__(name, bases, dct) if len(cls.__abstractmethods__) == 0 and cls.short_name not in settings.disabled: registry[cls.short_name] = cls() else: logger.info('Class {} not added to registry'.format(cls.__name__)) @six.add_metaclass(HarvesterMeta) class BaseHarvester(object): """ This is a base class that all harvesters should inheret from
#!/usr/bin/env python # SetupCophylogenyAnalysis.py import sys from optparse import OptionParser try: from lxml import etree as ElementTree from lxml.etree import Element, Comment, XMLParser ElementTree.set_default_parser(XMLParser(remove_blank_text=True, remove_comments=True)) except ImportError: print >> sys.stderr, '*** lxml unavailable, falling back on standard XML', \ 'implementation ***' from xml.etree import ElementTree from xml.etree.ElementTree import Element, Comment parser = OptionParser() parser.add_option('-a', '--associations', dest='assoc_filename') parser.add_option('-i', '--host', dest='HOST_TAXON') parser.add_option('-j', '--host-xml', dest='HOST_XML') parser.add_option('-s', '--symbiont', dest='SYMBIONT_TAXON') parser.add_option('-t', '--symbiont-xml', dest='SYMBIONT_XML') (options, args) = parser.parse_args() # XML tags, attributes, etc. ATTR = 'attr' BRANCH_RATES = 'branchRates' CLOCK = 'clock' COALESCENT_LIKELIHOOD = 'coalescentLikelihood' COEVOLUTION = 'coevolution'
from xmodule.mako_module import MakoDescriptorSystem from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT, ModuleStoreEnum, ModuleStoreReadBase from xmodule.modulestore.xml_exporter import DEFAULT_CONTENT_FIELDS from xmodule.tabs import CourseTabList from xmodule.x_module import ( # lint-amnesty, pylint: disable=unused-import AsideKeyGenerator, OpaqueKeyReader, XMLParsingSystem, policy_key) from .exceptions import ItemNotFoundError from .inheritance import InheritanceKeyValueStore, compute_inherited_metadata, inheriting_field_data edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_comments=True, remove_blank_text=True) etree.set_default_parser(edx_xml_parser) log = logging.getLogger(__name__) class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): # lint-amnesty, pylint: disable=abstract-method, missing-class-docstring def __init__( self, xmlstore, course_id, course_dir, # lint-amnesty, pylint: disable=too-many-statements error_tracker, load_error_modules=True, target_course_id=None, **kwargs): """
import traceback import threading from Queue import Queue __author__ = 'leifj' def _is_self_signed_err(ebuf): for e in ebuf: if e['func'] == 'xmlSecOpenSSLX509StoreVerify' and re.match('err=18', e['message']): return True return False etree.set_default_parser(etree.XMLParser(resolve_entities=False)) def _e(error_log, m=None): def _f(x): if ":WARNING:" in x: return False if m is not None and not m in x: return False return True return "\n".join(filter(_f, ["%s" % e for e in error_log])) class MetadataException(Exception): pass
parser.add_argument('-f', '-filePath', dest='filePath', help='Facebook chat log file (HTML file)', default='raw/messages') parser.add_argument("-max", "-maxExportedMessages", dest='maxExportedMessages', type=int, default=1000000, help="maximum number of messages to export") args = parser.parse_args() maxExportedMessages = args.maxExportedMessages ownName = args.ownName filePath = args.filePath data = [] warnedNameChanges = [] nbInvalidSender = 0 # make sure we don't crash if chat logs contain exotic characters etree.set_default_parser(etree.XMLParser(encoding='utf-8', ns_clean=True, recover=True)) for filename in os.listdir(filePath): if not filename.endswith('.html'): continue archive = etree.parse(filePath + "/" + filename) conversationId = filename.replace('.html', '') groupConversation = False timestamp = '' senderName = '' conversationWithName = None text = ''
from lxml import etree from django.db import models from django.db.models import signals from django.utils.translation import ugettext as _ from django.core import urlresolvers from django.template import defaultfilters from django.conf import settings from django.core.cache import cache import gdata.youtube import gdata.youtube.service logger = logging.getLogger(__name__) # Prevent: XMLSyntaxError: Attempt to load network entity etree.set_default_parser(etree.XMLParser(no_network=False, recover=True)) def youtube_entry_generator(entries, exclude=''): OK = ( 'Syndication of this video was restricted by its owner.', ) for entry in entries: m = re.match(r'.*/([0-9A-Za-z_-]*)/?$', entry.id.text) if not m: continue if not m.group(1): continue if m.group(1) in exclude: continue yield entry
GNU Radio Companion is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA """ from lxml import etree from .utils import odict xml_failures = {} etree.set_default_parser(etree.XMLParser(remove_comments=True)) class XMLSyntaxError(Exception): def __init__(self, error_log): self._error_log = error_log xml_failures[error_log.last_error.filename] = error_log def __str__(self): return '\n'.join(map(str, self._error_log.filter_from_errors())) def validate_dtd(xml_file, dtd_file=None): """ Validate an xml file against its dtd.
def __init__(self, file_name=None, input_stream=None, language=None, version="2.0", header=None, encoding="utf-8", dtd_validation=False): """ Prepare the document basic structure.""" self.encoding = encoding self.logger = getLogger(__name__) parser = etree.XMLParser( remove_comments=False, dtd_validation=dtd_validation) etree.set_default_parser(parser) if file_name: self.tree = etree.parse(file_name) self.root = self.tree.getroot() elif input_stream: if isinstance(input_stream, unicode): input_stream = input_stream.encode(encoding) self.root = etree.fromstring(input_stream) self.tree = etree.ElementTree(self.root) else: self.root = etree.Element(self.KAF_TAG, self.NS) self.tree = etree.ElementTree(self.root) if language: self.root.attrib[self.LANGUAGE_ATTRIBUTE] = language if version: self.root.set(self.VERSION_ATTRIBUTE, version) headers = self.tree.find(self.KAF_HEADER_TAG) if headers is not None and len(headers): self.kaf_header = headers else: # create nafheader element and put it in the beginning of # the document self.kaf_header = etree.Element(self.KAF_HEADER_TAG) self.root.insert(0, self.kaf_header) if header: self.set_header(header) raw_layer = self.tree.find(self.RAW_LAYER_TAG) if raw_layer is not None and len(raw_layer): self.raw = raw_layer else: self.raw = etree.SubElement(self.root, self.RAW_LAYER_TAG) text_layer = self.tree.find(self.TEXT_LAYER_TAG) if text_layer is not None and len(text_layer): self.text = text_layer else: self.text = etree.SubElement(self.root, self.TEXT_LAYER_TAG) terms_layer = self.tree.find(self.TERMS_LAYER_TAG) if text_layer is not None and len(terms_layer): self.terms = terms_layer else: self.terms = None dependencies_layer = self.tree.find(self.DEPENDENCY_LAYER_TAG) if dependencies_layer is not None and len(dependencies_layer): self.dependencies = dependencies_layer else: self.dependencies = None chunks_layer = self.tree.find(self.CHUNKS_LAYER_TAG) if chunks_layer is not None and len(chunks_layer): self.chunks = chunks_layer else: self.chunks = None constituency_layer = self.tree.find(self.CONSTITUENCY_LAYER) if constituency_layer is not None and len(constituency_layer): self.constituency = constituency_layer else: self.constituency = None named_entities_layer = self.tree.find(self.NAMED_ENTITIES_LAYER_TAG) if named_entities_layer is not None and len(named_entities_layer): self.entities = named_entities_layer else: self.entities = None coreference_layer = self.tree.find(self.COREFERENCE_LAYER_TAG) if coreference_layer is not None and len(coreference_layer): self.coreference = coreference_layer else: self.coreference = None
from itertools import chain from lazy import lazy from tempfile import NamedTemporaryFile as NamedTempFile import regex as re import random from lxml import etree from html import unescape __all__ = ["Structure", "iterstruct", "config"] __version__ = "0.0.0" # disable security preventing DoS attacks with huge files etree.set_default_parser(etree.ETCompatXMLParser(huge_tree=True)) STRUCTS = None class Structure(): """A structure extracted from a vertical. """ def __init__(self, raw_vert, structs): self.raw = raw_vert.strip() + "\n" self.structs = structs first_line = self.raw.split("\n", maxsplit=1)[0] self.name = re.search(r"\w+", first_line).group() self.attr = dict(re.findall(r'(\w+)="(.*?)"', first_line)) @lazy def xml(self):
<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM "file:///etc/passwd" >]> <foo>&xxe;</foo> ''' string_xml_external_url_1='''<?xml version="1.0" encoding="ISO-8859-1"?> <!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM "http://127.0.0.1/1" >]><foo>&xxe;</foo> ''' #Binary file (X) string_xml_external_file='''<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE entity [ <!ENTITY file SYSTEM "file:///usr/bin/whoami"> ]> <wooyun> <external>&file;</external> </wooyun> ''' xml_parse = etree.XMLParser(no_network=False) etree.set_default_parser(xml_parse) dom = etree.fromstring(string_xml_external_url) node_internal = dom.xpath('//external') print node_internal[0].text
print("SKIP: validation of XHTML output in functional tests" " (lxml < 2.0, api incompatibility)") etree = None if etree: class _Resolver(etree.Resolver): base_dir = dirname(abspath(__file__)) def resolve(self, system_url, public_id, context): return self.resolve_filename(join(self.base_dir, system_url.split("/")[-1]), context) _parser = etree.XMLParser(dtd_validation=True) _parser.resolvers.add(_Resolver()) etree.set_default_parser(_parser) def _format_error_log(data, log): msg = [] for entry in log: context = data.splitlines()[max(0, entry.line - 5): entry.line + 6] msg.append("\n# %s\n# URL: %s\n# Line %d, column %d\n\n%s\n" % (entry.message, entry.filename, entry.line, entry.column, "\n".join([each.decode('utf-8') for each in context]))) return "\n".join(msg).encode('ascii', 'xmlcharrefreplace') def _validate_xhtml(func_name, *args, **kwargs): page = b.get_html() if "xhtml1-strict.dtd" not in page:
ERRORS = { 'murano-engine.log': parse_log_file(os.path.join(LOG_PATH, "murano-engine.log")), 'murano-api.log': parse_log_file(os.path.join(LOG_PATH, "murano-api.log")), } STATS = { 'total': 0, 'success': 0, 'skip': 0, 'error': 0, 'failure': 0, } REPORT = {} et.set_default_parser(et.XMLParser(huge_tree=True)) tree = et.parse(sys.argv[1]) root = tree.getroot() STATS['total'] = int(root.attrib['tests']) STATS['failure'] = int(root.attrib['failures']) STATS['error'] = int(root.attrib['errors']) STATS['skip'] = int(root.attrib['skip']) STATS['unsuccess'] = STATS['failure'] + STATS['error'] + STATS['skip'] STATS['success'] = STATS['total'] - STATS['unsuccess'] for case in root: class_name = case.attrib['classname'] screenshot_file = 'artifacts/screenshots/%s.png' % case.attrib['name']
def __init__(self, infn, edxdir='.', org="UnivX", semester="2014_Spring", verbose=False, clean_up_html=True): if infn.endswith('.mbz'): # import gzip, tarfile # dir = tarfile.TarFile(fileobj=gzip.open(infn)) infnabs = os.path.abspath(infn) d = os.path.abspath('out/tmp') mdir = tempfile.mkdtemp(prefix="moodle2edx", dir=d) curdir = os.path.abspath('.') os.chdir(mdir) print 'Temporary dir: {}'.format(mdir) os.system('unzip {} -d {} > /dev/null 2>&1'.format(infnabs, mdir)) os.chdir(curdir) else: mdir = infn if not os.path.isdir(mdir): print "Input argument should be directory name or moodle *.mbz backup file" sys.exit(0) self.verbose = verbose self.edxdir = path(edxdir) self.moodle_dir = path(mdir) self.clean_up_html = clean_up_html if not self.edxdir.exists(): os.mkdir(self.edxdir) def mkdir(mdir): if not os.path.exists('%s/%s' % (self.edxdir, mdir)): os.mkdir(self.edxdir / mdir) edirs = ['html', 'problem', 'course', 'static'] for ed in edirs: mkdir(ed) self.URLNAMES = [] mfn = 'moodle_backup.xml' # top-level moodle backup xml qfn = 'questions.xml' # moodle questions xml qdict = self.load_questions(mdir, qfn) self.convert_static_files() parser = etree.XMLParser(encoding='utf-8') etree.set_default_parser(parser) moodx = etree.parse('%s/%s' % (mdir, mfn)) info = moodx.find('.//information') name = info.find('.//original_course_fullname').text number = info.find('.//original_course_shortname').text contents = moodx.find('.//contents') number = self.make_url_name(number, extra_ok_chars='.') # start course.xml cxml = etree.Element('course', graceperiod="1 day 5 hours 59 minutes 59 seconds") cxml.set('display_name',name) cxml.set('number', number) cxml.set('org','MITx') # place each activity as a new sequential inside a chapter # the chapter is specified by the section (moodle sectionid) # sections is dict with key=sectionid, value=chapter XML sections = {} self.load_moodle_course_head(cxml) # load the course/course.xml if it has anything seq = None # current sequential vert = None # current vertical activities = contents.findall('.//activity') for activity in activities: seq, vert = self.activity2chapter(activity, sections, cxml, seq, vert, qdict) chapter = cxml.find('chapter') name = name.replace('/',' ') chapter.set('name',name) # use first chapter for course name (FIXME) cdir = self.edxdir semester = self.make_url_name(semester) self.write_xml(cxml, '{}/course/{}.xml'.format(cdir, semester)) # the actual top-level course.xml file is a pointer XML file to the one in course/semester.xml open('%s/course.xml' % cdir, 'w').write('<course url_name="%s" org="%s" course="%s"/>\n' % (semester, org, number))
from furl import furl from lxml import etree from scrapi import util from scrapi import registry from scrapi import settings from scrapi.base.schemas import OAISCHEMA from scrapi.linter.document import RawDocument, NormalizedDocument from scrapi.base.transformer import XMLTransformer, JSONTransformer from scrapi.base.helpers import updated_schema, build_properties, oai_get_records_and_token logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) etree.set_default_parser(etree.XMLParser(recover=True)) class HarvesterMeta(abc.ABCMeta): def __init__(cls, name, bases, dct): super(HarvesterMeta, cls).__init__(name, bases, dct) if len(cls.__abstractmethods__ ) == 0 and cls.short_name not in settings.disabled: registry[cls.short_name] = cls() else: logger.info('Class {} not added to registry'.format(cls.__name__)) @six.add_metaclass(HarvesterMeta) class BaseHarvester(object): """ This is a base class that all harvesters should inheret from
from .parse_version import parse_version import openerp # get_encodings, ustr and exception_to_unicode were originally from tools.misc. # There are moved to loglevels until we refactor tools. from openerp.loglevels import get_encodings, ustr, exception_to_unicode # noqa _logger = logging.getLogger(__name__) # List of etree._Element subclasses that we choose to ignore when parsing XML. # We include the *Base ones just in case, currently they seem to be subclasses of the _* ones. SKIPPED_ELEMENT_TYPES = (etree._Comment, etree._ProcessingInstruction, etree.CommentBase, etree.PIBase, etree._Entity) # Configure default global parser etree.set_default_parser(etree.XMLParser(resolve_entities=False)) #---------------------------------------------------------- # Subprocesses #---------------------------------------------------------- def find_in_path(name): path = os.environ.get('PATH', os.defpath).split(os.pathsep) if config.get('bin_path') and config['bin_path'] != 'None': path.append(config['bin_path']) return which(name, path=os.pathsep.join(path)) def _exec_pipe(prog, args, env=None): cmd = (prog, ) + args
LAST_LOG_ENTRY['log'] += line return [log_record for log_record in LOG_RECORDS if log_record['level'] == "ERROR"] STATS = { 'total': 0, 'success': 0, 'skip': 0, 'error': 0, 'failure': 0, } REPORT = {} et.set_default_parser(et.XMLParser(huge_tree=True)) tree = et.parse(sys.argv[1]) root = tree.getroot() STATS['total'] = int(root.attrib['tests']) STATS['failure'] = int(root.attrib['failures']) STATS['error'] = int(root.attrib['errors']) STATS['skip'] = int(root.attrib['skip']) STATS['unsuccess'] = STATS['failure'] + STATS['error'] + STATS['skip'] STATS['success'] = STATS['total'] - STATS['unsuccess'] for case in root: class_name = case.attrib['classname'] screenshot_file = 'logs/artifacts/screenshots/%s.png' % case.attrib['name']
from opaque_keys.edx.keys import UsageKey from opaque_keys.edx.locations import SlashSeparatedCourseKey from xblock.field_data import DictFieldData from xblock.runtime import DictKeyValueStore, IdGenerator from . import ModuleStoreReadBase, Location, XML_MODULESTORE_TYPE from .exceptions import ItemNotFoundError from .inheritance import compute_inherited_metadata, inheriting_field_data from xblock.fields import ScopeIds, Reference, ReferenceList, ReferenceValueDict edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_comments=True, remove_blank_text=True) etree.set_default_parser(edx_xml_parser) log = logging.getLogger(__name__) # VS[compat] # TODO (cpennington): Remove this once all fall 2012 courses have been imported # into the cms from xml def clean_out_mako_templating(xml_string): xml_string = xml_string.replace("%include", "include") xml_string = re.sub(r"(?m)^\s*%.*$", "", xml_string) return xml_string class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): def __init__(
import logging import time import sha import urllib import os.path import shutil from lxml import etree from django.conf import settings from django.db.models import get_model # Prevent: XMLSyntaxError: Attempt to load network entity etree.set_default_parser(etree.XMLParser(no_network=False, recover=True)) logger = logging.getLogger('gsm') def get_object_from_url(url): data = [] Sport = get_model('gsm', 'sport') GsmEntity = get_model('gsm', 'gsmentity') sports = Sport.objects.all().values_list('slug', flat=True) for part in url.split('/'): if part in sports: data.append(part) elif 1 <= len(data) and len(data) < 3: data.append(part) return GsmEntity.objects.get(sport__slug=data[0], tag=data[1], gsm_id=data[2])
from lxml import etree from oaipmh.metadata import MetadataRegistry from django.utils.datastructures import MultiValueDict from django.contrib.auth.models import User from libros.bookreader.models import Book, Page, Annotation, Link, Transcription from libros.bookreader.harvesting.metadata import metadata_registry log = getLogger('bookreader.harvesting.book') etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False, no_network=False)) class BookHarvester(object): def __init__(self, book, metadata_registry=metadata_registry, bundle_prefix='ore'): assert isinstance(book, Book) assert isinstance(metadata_registry, MetadataRegistry) self.book = book self.repository = book.collection.repository.connection(metadata_registry=metadata_registry) self._bundle_prefix = bundle_prefix def _set_additional_metadata(self, field, value): """ If a value does not equate to false, set the field in the additional_metadata multi-valued dictionary""" if not value: return
import logging from lxml import etree from django.core.management.base import NoArgsCommand from django.conf import settings from music.models import * etree.set_default_parser(etree.XMLParser(no_network=False)) letters = [chr(i) for i in xrange(ord('a'), ord('z')+1)] letters = ['a'] class Command(NoArgsCommand): help = "Synchronise lastfm artist, track and album database." def handle_noargs(self, **options): logging.basicConfig(level=logging.DEBUG, format="%(message)s") logging.info("-" * 72) for letter in letters: total_pages = None url = 'http://ws.audioscrobbler.com/2.0/?api_key=%s&method=%s&artist=%s&page=%s' % ( settings.LASTFM_API_KEY, 'artist.search', letter, 1 ) tree = etree.parse(url)