Source code for tocoli.enc

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from tocoli import Py, PY2
import tocoli.py2.enc as py2enc
import tocoli.py3.enc as py3enc

[docs]def encode(input, encoding='utf-8', errors='strict', input_encoding='utf-8'): """Encode any string. Returns a byte string otherwise the input. Use this funtion to encode strings you like to write to any kind of output (e.g. stdout, file, API, …). This is useful to exchange information in a standardized way (the encoding). This function is a universal wrapper for the built-in `codecs.encode()` function. Examples: Default usage:: >>> encode('café') b'caf\xc3\xa9' # utf-8 encoded byte string Transform encoding:: >>> encode('caf\xe9', 'utf-8', input_encoding='latin-1') b'caf\xc3\xa9' # utf-8 encoded byte string Advanced codec support:: >>> encode('café', 'base64') b'Y2Fmw6k=\\n' # base64 encoded byte string # inner encoding is always utf-8 Args: input (str): Any string you like to encode. The string can be a byte string or a unicode decoded string. The function makes sure to treat the input type in the correct manner. encoding (Optional[str]): Output encoding. Defaults to utf-8. Defines the encoding for the resulting byte string. errors (Optional[str]): Error handling schemes. Defaults to 'strict' These string values are predefined: 'strict' - raise a ValueError error (or a subclass) 'ignore' - ignore the character and continue with the next 'replace' - replace with a suitable replacement character; Python will use the official U+FFFD REPLACEMENT CHARACTER for the builtin Unicode codecs on decoding and '?' on encoding. 'xmlcharrefreplace' - Replace with the appropriate XML character reference (only for encoding). 'backslashreplace' - Replace with backslashed escape sequences (only for encoding). input_encoding (Optional[str]): Given encoding. Defaults to utf-8. Set this parameter if your input is not encoded as utf-8. Returns: str (python2) bytes (python3): Encoded byte string. """ if PY2: return py2enc.encode(input, encoding, errors, input_encoding) else: try: return py3enc.encode(input, encoding, errors, input_encoding) except: raise NotImplementedError(Py.Ver.text + ' is not supported')
[docs]def decode(input, encoding='utf-8', errors='strict', detect='utf-8'): """Decode any string. Returns a unicode decoded string otherwise the input. Use this function to decode an encoded string. Its recommended to use this function as early as possible in your data flow, when you received data from an external resource (e.g. stdin, file, API, …). Once decoded you have access to the encoded information and can operate on it. This func- tion is a universal wrapper for the built-in `codecs.decode()` function. Examples: Default encoding utf-8:: >>> decode('caf\xc3\xa9') u'café' # utf-8 decoded string Non default encoding:: >>> decode('caf\xe9', 'latin-1') u'café' # utf-8 decoded string Advanced codec support:: >>> decode('Y2Fmw6k=\\n', 'base64') u'café' # utf-8 decoded string Args: input (str): Any string you like to decode. The string can be a byte string or a unicode decoded string. The function makes sure to treat the input type in the correct manner. encoding (Optional[str]): Input encoding. Defaults to utf-8. Defines the encoding of the input. errors (Optional[str]): Error handling schemes. Defaults to 'strict' These string values are predefined: 'strict' - raise a ValueError error (or a subclass) 'ignore' - ignore the character and continue with the next 'replace' - replace with a suitable replacement character; Python will use the official U+FFFD REPLACEMENT CHARACTER for the builtin Unicode codecs on decoding and '?' on encoding. 'xmlcharrefreplace' - Replace with the appropriate XML character reference (only for encoding). 'backslashreplace' - Replace with backslashed escape sequences (only for encoding). detect (Optional[str]): Inner encoding. Defaults to utf-8. This parameter defines the recursive decoding type (e.g. contains a `base64` encoded string inside another encoded string, which might be a utf-8 or something else). If the given inner decoding type is not correct, then the function tries to detect the encoding auto- matically. Returns: unicode (python2) str (python3): Encoded byte string. """ if PY2: return py2enc.decode(input, encoding, errors, detect) else: try: return py3enc.decode(input, encoding) except: raise NotImplementedError(Py.Ver.text + ' is not supported')