def testvalidation(self): response.headers['Content-Type'] = 'application/json' validationId = dict(request.params)['validationId'] appSettings = meta.Session.query(model.ApplicationSettings).order_by( desc(model.ApplicationSettings.id)).first() # Get the appropriate Inventory object for validation from app_globals # These objects are saved by functions/applicationSettingsToAppGlobals # when the application settings are saved if validationId == u'testOrthographicValidation': inventory = app_globals.orthTranscrInvObj fieldName = u'transcription' elif validationId == u'testNarrPhonValidation': inventory = app_globals.narrPhonInvObj fieldName = u'narrowPhoneticTranscription' elif validationId == u'testBroadPhonValidation': inventory = app_globals.broadPhonInvObj fieldName = u'phoneticTranscription' elif validationId in [u'testMorphophonValidation', u'testOrthographicMBValidation']: inventory = app_globals.morphBreakInvObj fieldName = u'morphemeBreak' # regex is the regular expression that valid fields will match regex = inventory.getRegexValidator() # NFDFixes returns True if unicode canonical decompositional # normalization makes the data valid def NFDFixes(string, regex): string = h.NFD(string) patt = re.compile(regex) if patt.match(string): return True return False # Count the forms in the db formCount = meta.Session.query(model.Form).count() # Get the invalid forms using regex and SQLAlchemy invalidForms = meta.Session.query(model.Form).filter(not_( getattr(model.Form, fieldName).op('regexp')(regex))).all() # Build the result to return to the user result = {'formCount': formCount, 'invalidCount': len(invalidForms), 'NFDFixes': 0, 'invalid': [], 'validationInventory': inventory.inputList} for f in invalidForms : if NFDFixes(getattr(f, fieldName), regex): result['NFDFixes'] += 1 else: result['invalid'].append([f.id, h.NFD(getattr(f, fieldName)), h.getUnicodeCodePoints(h.NFD(getattr(f, fieldName)))]) return json.dumps(result)
def getCharacterCodesAndNames(self): input = h.NFD(dict(request.params)['input']) response.headers['Content-Type'] = 'application/json' return json.dumps( (h.getUnicodeCodePoints(input), h.getUnicodeNames(input)))
def testvalidation(self): response.headers['Content-Type'] = 'application/json' validationId = dict(request.params)['validationId'] appSettings = meta.Session.query(model.ApplicationSettings).order_by( desc(model.ApplicationSettings.id)).first() # Get the appropriate Inventory object for validation from app_globals # These objects are saved by functions/applicationSettingsToAppGlobals # when the application settings are saved if validationId == u'testOrthographicValidation': inventory = app_globals.orthTranscrInvObj fieldName = u'transcription' elif validationId == u'testNarrPhonValidation': inventory = app_globals.narrPhonInvObj fieldName = u'narrowPhoneticTranscription' elif validationId == u'testBroadPhonValidation': inventory = app_globals.broadPhonInvObj fieldName = u'phoneticTranscription' elif validationId in [ u'testMorphophonValidation', u'testOrthographicMBValidation' ]: inventory = app_globals.morphBreakInvObj fieldName = u'morphemeBreak' # regex is the regular expression that valid fields will match regex = inventory.getRegexValidator() # NFDFixes returns True if unicode canonical decompositional # normalization makes the data valid def NFDFixes(string, regex): string = h.NFD(string) patt = re.compile(regex) if patt.match(string): return True return False # Count the forms in the db formCount = meta.Session.query(model.Form).count() # Get the invalid forms using regex and SQLAlchemy invalidForms = meta.Session.query(model.Form).filter( not_(getattr(model.Form, fieldName).op('regexp')(regex))).all() # Build the result to return to the user result = { 'formCount': formCount, 'invalidCount': len(invalidForms), 'NFDFixes': 0, 'invalid': [], 'validationInventory': inventory.inputList } for f in invalidForms: if NFDFixes(getattr(f, fieldName), regex): result['NFDFixes'] += 1 else: result['invalid'].append([ f.id, h.NFD(getattr(f, fieldName)), h.getUnicodeCodePoints(h.NFD(getattr(f, fieldName))) ]) return json.dumps(result)
result = schema.to_python(values) except Invalid, e: result = {'valid': False, 'errors': e.unpack_errors()} else: # Count all the characters tokens by type result = {} forms = meta.Session.query(model.Form).all() field = values['field'] fieldBag = ''.join([getattr(f, field) for f in forms if getattr(f, field)]) for c in fieldBag: try: result[c] += 1 except KeyError: result[c] = 1 # Sort the tokens by count in descending order result = [[k, result[k]] for k in result] result = sorted(result, key=lambda x: x[1], reverse=True) # Add some character information result = [[c[0], c[1], h.getUnicodeNames(c[0]), h.getUnicodeCodePoints(c[0]), ud.normalize('NFC', c[0]), h.getUnicodeCodePoints(ud.normalize('NFC', c[0]))] for c in result] result = {'valid': True, 'response': result} response.headers['Content-Type'] = 'application/json' return json.dumps(result)
else: # Count all the characters tokens by type result = {} forms = meta.Session.query(model.Form).all() field = values['field'] fieldBag = ''.join( [getattr(f, field) for f in forms if getattr(f, field)]) for c in fieldBag: try: result[c] += 1 except KeyError: result[c] = 1 # Sort the tokens by count in descending order result = [[k, result[k]] for k in result] result = sorted(result, key=lambda x: x[1], reverse=True) # Add some character information result = [[ c[0], c[1], h.getUnicodeNames(c[0]), h.getUnicodeCodePoints(c[0]), ud.normalize('NFC', c[0]), h.getUnicodeCodePoints(ud.normalize('NFC', c[0])) ] for c in result] result = {'valid': True, 'response': result} response.headers['Content-Type'] = 'application/json' return json.dumps(result)