def validate(filename): """ Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . :param filename: the filename to validate """ import HTMLParser from py_w3c.validators.html.validator import HTMLValidator h = HTMLParser.HTMLParser() # for unescaping WC3 messages vld = HTMLValidator() LOG.info("Validating: {0}".format(filename)) # call w3c webservice vld.validate_file(filename) # display errors and warning for err in vld.errors: LOG.error(u'line: {0}; col: {1}; message: {2}'. format(err['line'], err['col'], h.unescape(err['message'])) ) for err in vld.warnings: LOG.warning(u'line: {0}; col: {1}; message: {2}'. format(err['line'], err['col'], h.unescape(err['message'])) )
def is_socket_response_html(self): """ Validate socket response for html syntax :return: """ val = HTMLValidator() return val.validate_fragment(self.reply.decode())
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server expected_string='', # The number of validation errors expected expected_errors=0): self.get(url) assert self.status == 200 if expected_string: assert expected_string in self.text vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) # Save the HTML to make fixing the errors easier. with open(url.replace('/', '-') + '.html', 'w') as f: f.write(self.text.replace('\r\n', '\n')) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings)
def validate(filename): """ Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . :param filename: the filename to validate """ # Python3 html parser is in different spot from html.parser import HTMLParser from py_w3c.validators.html.validator import HTMLValidator h = HTMLParser() # for unescaping WC3 messages vld = HTMLValidator() LOG.info("Validating: {0}".format(filename)) # call w3c webservice vld.validate_file(filename) # display errors and warning for err in vld.errors: pprint.pprint(err) if "lastLine" in err.keys(): LOG.error("line: {0}; col: {1}; message: {2}".format( err['lastLine'], err['lastColumn'], h.unescape(err['message']))) else: LOG.error("message: {0}".format(h.unescape(err['message']))) for err in vld.warnings: if "lastLine" in err.keys(): LOG.error("line: {0}; col: {1}; message: {2}".format( err['lastLine'], err['lastColumn'], h.unescape(err['message']))) else: LOG.error("message: {0}".format(h.unescape(err['message'])))
def validate(filename): """ Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . :param filename: the filename to validate """ try: from html.parser import HTMLParser except ImportError: # fallback for Python 2: from HTMLParser import HTMLParser from py_w3c.validators.html.validator import HTMLValidator h = HTMLParser() # for unescaping WC3 messages vld = HTMLValidator() LOG.info("Validating: {0}".format(filename)) # call w3c webservice vld.validate_file(filename) # display errors and warning for err in vld.errors: line = err.get('line') or err['lastLine'] col = err.get('col') or '{}-{}'.format(err['firstColumn'], err['lastColumn']) LOG.error(u'line: {0}; col: {1}; message: {2}'.format( line, col, h.unescape(err['message']))) for err in vld.warnings: line = err.get('line') or err['lastLine'] col = err.get('col') or '{}-{}'.format(err['firstColumn'], err['lastColumn']) LOG.warning(u'line: {0}; col: {1}; message: {2}'.format( line, col, h.unescape(err['message'])))
def isValidHTML(result): html_validator = HTMLValidator() html_validator.validate_fragment(result) errors = list(filter(is_really_error, html_validator.errors)) if len(errors) != 0: pprint.pprint(errors) return len(errors) == 0
def parse(self, response): with open("page.html", "a") as f: f.write(response.text) vld = HTMLValidator() vld.validate_file("page.html") with open("validatePage.json", "a") as f: f.write(f"{json.dumps(vld.errors, indent=4)}")
def setUp(self): self.token = genToken() self.server = InMemServer(auth_token=self.token) self.server.start() self.db = self.server.connect(self.token) self.db.subscribeToSchema(test_schema) self.cells = Cells(self.db) self.validator = HTMLValidator()
def run(self): html_validator = HTMLValidator() for file in self.files: html_validator.validate_file(os.path.join(self.project_path, file)) print(os.path.join(self.project_path, file)) if len(html_validator.errors): self.show_error_report(html_validator.errors) raise Exception( "Failed to validate HTML file: {}".format(file))
def check_url(self, url): response = self.client.get(url) vld = HTMLValidator() vld.validate_fragment(response.content) if vld.errors or vld.warnings: print(response.content) if vld.errors: print("ERRORS: %s" % json.dumps(vld.errors, indent=4)) if vld.warnings: print("WARNINGS: %s" % json.dumps(vld.warnings, indent=4)) self.assertEqual(vld.errors, []) self.assertEqual(vld.warnings, [])
def test_relatorio_html(self): from src.dropbox_files import download_dropbox_file download_dropbox_file() df = get_operations_dataframe() df = df.tail(150) calcula_custodia(df) calculo_ir = CalculoIr(df=df) calculo_ir.calcula() from py_w3c.validators.html.validator import HTMLValidator assert HTMLValidator().validate_fragment(relatorio_html(calculo_ir))
def test_excel(self): """test XLS, XLSX parsing""" test_files = { "Revised.Haplogroups.1000G.20140205.xlsx": { "contents": [ "Continent", "Population", "ID", "Macrohaplogroup", "Haplogroup", "Informative SNPs", "NA19239", "NA19256", "E1b1a1a1g1a2", ] }, "lclarke_phase1_sequence_stats_20120330.xls": { "contents": [ "Breakdown of data generated by project, technology, submitting centre", "92219554043", "90363687334" ] } } vld = HTMLValidator() for file, expected_data in test_files.items(): in_file = os.path.join(BASE_DIR, "excel", file) with open(in_file, mode="rb") as excel: for html in [False, True]: body, _ = extract_excel(excel, as_html=html) # print(body) tags = ['<div>', '<tbody>', '<th>', '<td>', '<tr>'] if html: vld.validate_fragment(body) assert all(t in body for t in tags) serious_errors = [ e for e in vld.errors if e["message"] not in ACCEPTABLE_ERROR_MESSAGES ] assert not serious_errors print(vld.warnings) else: assert not any(t in body for t in tags) assert all(c in body for c in expected_data["contents"])
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # An optional dictionary of query parameters. params=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: self.post(url, **kwargs) assert self.status == expected_status if expected_string: assert expected_string in self.text if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. print(self.text[:200]) with open(url.replace('/', '-') + '.html', 'wb') as f: f.write(self.text.replace('\r\n', '\n')) raise
def test_relatorio_html(self): data = [{ 'ticker': 'MAXR11', 'qtd': 100, 'data': datetime.date(2019, 3, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'PETR4', 'qtd': 100, 'data': datetime.date(2019, 4, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': 100, 'data': datetime.date(2019, 4, 12), 'preco': 200, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': -50, 'data': datetime.date(2019, 5, 12), 'preco': 220, 'aquisicao_via': 'HomeBroker' }] df = create_testing_dataframe(data) custodia = calcula_custodia(df) calculo_ir = CalculoIr(df=df) calculo_ir.calcula() from py_w3c.validators.html.validator import HTMLValidator assert HTMLValidator().validate_fragment( relatorio_html(custodia, calculo_ir, datetime.date.today(), False))
def test_deve_retornar_relatorio_html_com_todos_os_meses(self): data = [{ 'ticker': 'MAXR11', 'qtd': 100, 'data': datetime.date(2019, 3, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'PETR4', 'qtd': 100, 'data': datetime.date(2019, 4, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': 100, 'data': datetime.date(2019, 4, 12), 'preco': 200, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': -50, 'data': datetime.date(2019, 5, 12), 'preco': 220, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'MAXR11', 'qtd': 100, 'data': datetime.date(2019, 6, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'PETR4', 'qtd': 100, 'data': datetime.date(2019, 10, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': 100, 'data': datetime.date(2019, 10, 12), 'preco': 200, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': -50, 'data': datetime.date(2019, 11, 12), 'preco': 220, 'aquisicao_via': 'HomeBroker' }] df = create_testing_dataframe(data) calcula_custodia(df) calculo_ir = CalculoIr(df=df) calculo_ir.calcula() from py_w3c.validators.html.validator import HTMLValidator html = relatorio_html(calculo_ir) assert HTMLValidator().validate_fragment(html) assert 'MES : 05/2019' in html assert 'MES : 11/2019' in html
def setUp(self): self.url = reverse('landing:main_page') self.validator = HTMLValidator()
'/home/user/workspace/Problem Statement Two/StatementTwo/files/25 errors - a1 (1).html', 'w') txt.write(text.get(0.0, Tkinter.END)) txt.close() def key(event): if event.char == '\x1b': root.destroy() else: val = text.index(Tkinter.INSERT).split('.') lineNum.config(text=str(val[0])) colNum.config(text=str(val[1])) vld = HTMLValidator(doctype="HTML 4.01 Strict") vld.validate_file( '/home/user/workspace/Problem Statement Two/StatementTwo/files/25 errors - a1 (1).html' ) errors = vld.errors locationList = [] for error in range(len(errors)): locationList.append(errors[error]['line']) print locationList txt = open( '/home/user/workspace/Problem Statement Two/StatementTwo/files/25 errors - a1 (1).html', 'r') lineNum = 1 errorIndex = 0 for line in txt:
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # An optional dictionary of query parameters. params=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: self.post(url, **kwargs) assert self.status == expected_status if expected_string: assert expected_string in self.text if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. validation_file = url.replace('/', '-') + '.html' with open(validation_file, 'wb') as f: f.write(self.text.replace('\r\n', '\n')) print('Validation failure saved to {}.'.format(validation_file)) raise except RuntimeError as e: # Provide special handling for web2py exceptions by saving the # resulting traceback. if e.args[0].startswith('ticket '): # Create a client to access the admin interface. admin_client = WebClient('http://127.0.0.1:8000/admin/', postbacks=True) # Log in. admin_client.post( '', data={'password': self.web2py_server.password}) assert admin_client.status == 200 # Get the error. error_code = e.args[0][len('ticket '):] admin_client.get('default/ticket/' + error_code) assert admin_client.status == 200 # Save it to a file. traceback_file = url.replace('/', '-') + '_traceback.html' with open(traceback_file, 'wb') as f: f.write(admin_client.text.replace('\r\n', '\n')) print('Traceback saved to {}.'.format(traceback_file)) raise
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server. If this is a list of strings, at least one of the provided strings but be in the text returned by the server. expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: try: self.post(url, **kwargs) except HTTPError as e: # If this was the expected result, return. if e.code == expected_status: # Since this is an error of some type, these paramets must be empty, since they can't be checked. assert not expected_string assert not expected_errors return '' else: raise assert self.status == expected_status if expected_string: if isinstance(expected_string, str): assert expected_string in self.text else: # Assume ``expected_string`` is a list of strings. assert all(string in self.text for string in expected_string) if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) return self.text except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. validation_file = url.replace('/', '-') + '.html' with open(validation_file, 'wb') as f: f.write(_html_prep(self.text)) print('Validation failure saved to {}.'.format(validation_file)) raise except RuntimeError as e: # Provide special handling for web2py exceptions by saving the # resulting traceback. if e.args[0].startswith('ticket '): # Create a client to access the admin interface. admin_client = WebClient('{}/admin/'.format( self.web2py_server_address), postbacks=True) # Log in. admin_client.post( '', data={'password': self.web2py_server.password}) assert admin_client.status == 200 # Get the error. error_code = e.args[0][len('ticket '):] admin_client.get('default/ticket/' + error_code) assert admin_client.status == 200 # Save it to a file. traceback_file = url.replace('/', '-') + '_traceback.html' with open(traceback_file, 'wb') as f: f.write(_html_prep(admin_client.text)) print('Traceback saved to {}.'.format(traceback_file)) raise
def validate(self): vld = HTMLValidator() vld.validate(self.url) self.result.w3c = vld.errors
with open(csv_file, 'rU') as f: reader = csv.reader(f) csv_hash = {rows[0]: rows[1] for rows in reader} for lib_name, url in csv_hash.items(): try: url = url.replace(" ", "") lib_name = lib_name.replace(" ", "_") # Create folder for your current library's data print "Creating folder for " + lib_name subdirectory = lib_name dir_path = "./" + directory + "/" + subdirectory if not os.path.exists(dir_path): os.makedirs(dir_path) # W3C Query/Response vld = HTMLValidator() vld.validate(url) w3c_warnings = convert(vld.warnings) save(w3c_warnings, 'w3c_warnings', lib_name, directory, subdirectory) w3c_errors = convert(vld.errors) save(w3c_errors, 'w3c_errors', lib_name, directory, subdirectory) w3c_warnings = solrize_w3c(w3c_warnings, "w3c_warnings") w3c_errors = solrize_w3c(w3c_errors, "w3c_errors") w3c = dict(w3c_warnings, **w3c_errors) w3c = flatten(w3c) time.sleep(1)