def EvalCStringToken(tok): # type: (Token) -> Optional[str] """ This function is shared between echo -e and $''. $'' could use it at compile time, much like brace expansion in braces.py. """ id_ = tok.id value = tok.val if id_ in (Id.Char_Literals, Id.Unknown_Backslash): # shopt -s strict_backslash detects Unknown_Backslash at PARSE time in Oil. return value elif id_ == Id.Char_OneChar: c = value[1] return consts.LookupCharC(c) elif id_ == Id.Char_Stop: # \c returns a special sentinel return None elif id_ in (Id.Char_Octal3, Id.Char_Octal4): if id_ == Id.Char_Octal3: # $'\377' (disallowed at parse time in Oil) s = value[1:] else: # echo -e '\0377' s = value[2:] i = int(s, 8) if i >= 256: i = i % 256 # NOTE: This is for strict mode #raise AssertionError('Out of range') return chr(i) elif id_ == Id.Char_Hex: s = value[2:] i = int(s, 16) return chr(i) elif id_ in (Id.Char_Unicode4, Id.Char_Unicode8): s = value[2:] i = int(s, 16) #util.log('i = %d', i) return string_ops.Utf8Encode(i) elif id_ == Id.Char_UBraced: s = value[3:-1] # \u{123} i = int(s, 16) return string_ops.Utf8Encode(i) else: raise AssertionError()
def EvalCStringToken(tok): # type: (Token) -> Optional[str] """ This function is shared between echo -e and $''. $'' could use it at compile time, much like brace expansion in braces.py. """ id_ = tok.id value = tok.val if id_ == Id.Char_Literals: return value elif id_ == Id.Char_BadBackslash: if 1: # Either \A or trailing \ (A is not a valid backslash escape) # TODO: add location info with tok.span_id (errfmt), and make it an rror # when strict_backslash is on. I USED this to fix a refactored regex! # Extract from [[ ]] and fix backslashes. stderr_line( 'warning: Invalid backslash escape in C-style string: %r' % value) #from core.pyerror import e_die #e_die('Invalid backslash escape %r', value, span_id=tok.span_id) return value elif id_ == Id.Char_OneChar: c = value[1] return consts.LookupCharC(c) elif id_ == Id.Char_Stop: # \c returns a special sentinel return None elif id_ in (Id.Char_Octal3, Id.Char_Octal4): if id_ == Id.Char_Octal3: # $'\377' s = value[1:] else: # echo -e '\0377' s = value[2:] i = int(s, 8) if i >= 256: i = i % 256 # NOTE: This is for strict mode #raise AssertionError('Out of range') return chr(i) elif id_ == Id.Char_Hex: s = value[2:] i = int(s, 16) return chr(i) elif id_ in (Id.Char_Unicode4, Id.Char_Unicode8): s = value[2:] i = int(s, 16) #util.log('i = %d', i) return string_ops.Utf8Encode(i) else: raise AssertionError()
def EvalCStringToken(id_, value): # type: (Id_t, str) -> Optional[str] """ This function is shared between echo -e and $''. $'' could use it at compile time, much like brace expansion in braces.py. """ if id_ == Id.Char_Literals: return value elif id_ == Id.Char_BadBackslash: if 1: # TODO: # - make this an error in strict mode # - improve the error message. We don't have a span_id! # Either \A or trailing \ (A is not a valid backslash escape) ui.Stderr('warning: Invalid backslash escape in C-style string') return value elif id_ == Id.Char_OneChar: c = value[1] return _ONE_CHAR[c] elif id_ == Id.Char_Stop: # \c returns a special sentinel return None elif id_ in (Id.Char_Octal3, Id.Char_Octal4): if id_ == Id.Char_Octal3: # $'\377' s = value[1:] else: # echo -e '\0377' s = value[2:] i = int(s, 8) if i >= 256: i = i % 256 # NOTE: This is for strict mode #raise AssertionError('Out of range') return chr(i) elif id_ == Id.Char_Hex: s = value[2:] i = int(s, 16) return chr(i) elif id_ in (Id.Char_Unicode4, Id.Char_Unicode8): s = value[2:] i = int(s, 16) #util.log('i = %d', i) return string_ops.Utf8Encode(i) else: raise AssertionError()
def testUtf8Encode(self): CASES = [ (u'\u0065'.encode('utf-8'), 0x0065), (u'\u0100'.encode('utf-8'), 0x0100), (u'\u1234'.encode('utf-8'), 0x1234), (u'\U00020000'.encode('utf-8'), 0x00020000), # Out of range gives Unicode replacement character. ('\xef\xbf\xbd', 0x10020000), ] for expected, code_point in CASES: print('') print('Utf8Encode case %r %r' % (expected, code_point)) self.assertEqual(expected, string_ops.Utf8Encode(code_point))
def EvalCStringToken(id_, value): """ This function is shared between echo -e and $''. $'' could use it at compile time, much like brace expansion in braces.py. """ if id_ == Id.Char_Literals: return value elif id_ == Id.Char_BadBackslash: if 1: # TODO: error in strict mode # Either \A or trailing \ (A is not a valid backslash escape) util.warn('Invalid backslash escape') return value elif id_ == Id.Char_OneChar: c = value[1] return _ONE_CHAR[c] elif id_ == Id.Char_Stop: # \c returns a special sentinel return None elif id_ in (Id.Char_Octal3, Id.Char_Octal4): if id_ == Id.Char_Octal3: # $'\377' s = value[1:] else: # echo -e '\0377' s = value[2:] i = int(s, 8) if i >= 256: i = i % 256 # NOTE: This is for strict mode #raise AssertionError('Out of range') return chr(i) elif id_ == Id.Char_Hex: s = value[2:] i = int(s, 16) return chr(i) elif id_ in (Id.Char_Unicode4, Id.Char_Unicode8): s = value[2:] i = int(s, 16) #util.log('i = %d', i) return string_ops.Utf8Encode(i) else: raise AssertionError