rfc:uconverter
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
rfc:uconverter [2012/10/30 17:10] – [Specification of the Class] Note ennumeration functions being static pollita | rfc:uconverter [2017/09/22 13:28] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 3: | Line 3: | ||
* Date: 2012-10-29 | * Date: 2012-10-29 | ||
* Author: Sara Golemon < | * Author: Sara Golemon < | ||
- | * Status: | + | * Status: |
* First Published at: http:// | * First Published at: http:// | ||
Exposes ICU's UConverter functions by adding a class to the ext/intl extension | Exposes ICU's UConverter functions by adding a class to the ext/intl extension | ||
+ | ===== Vote ===== | ||
+ | |||
+ | < | ||
+ | title=" | ||
+ | * Yes | ||
+ | * No | ||
+ | </ | ||
===== Introduction ===== | ===== Introduction ===== | ||
Line 16: | Line 23: | ||
class UConverter { | class UConverter { | ||
/* UConverterCallbackReason */ | /* UConverterCallbackReason */ | ||
- | const UCNV_UNASSIGNED; | + | const REASON_UNASSIGNED; |
- | const UCNV_ILLEGAL; | + | const REASON_ILLEGAL; |
- | const UCNV_IRREGULAR; | + | const REASON_IRREGULAR; |
- | const UCNV_RESET; | + | const REASON_RESET; |
- | const UCNV_CLOSE; | + | const REASON_CLOSE; |
- | const UCNV_CLONE; | + | const REASON_CLONE; |
| | ||
/* UConverterType */ | /* UConverterType */ | ||
- | const UCNV_UNSUPPORTED_CONVERTER); | + | const UNSUPPORTED_CONVERTER); |
- | const UCNV_SBCS; | + | const SBCS; |
- | const UCNV_DBCS; | + | const DBCS; |
- | const UCNV_MBCS; | + | const MBCS; |
- | const UCNV_LATIN_1; | + | const LATIN_1; |
- | const UCNV_UTF8; | + | const UTF8; |
- | const UCNV_UTF16_BigEndian; | + | const UTF16_BigEndian; |
- | const UCNV_UTF16_LittleEndian; | + | const UTF16_LittleEndian; |
- | const UCNV_UTF32_BigEndian; | + | const UTF32_BigEndian; |
- | const UCNV_UTF32_LittleEndian; | + | const UTF32_LittleEndian; |
- | const UCNV_EBCDIC_STATEFUL; | + | const EBCDIC_STATEFUL; |
- | const UCNV_ISO_2022; | + | const ISO_2022; |
- | const UCNV_LMBCS_1; | + | const LMBCS_1; |
- | const UCNV_LMBCS_2; | + | const LMBCS_2; |
- | const UCNV_LMBCS_3; | + | const LMBCS_3; |
- | const UCNV_LMBCS_4; | + | const LMBCS_4; |
- | const UCNV_LMBCS_5; | + | const LMBCS_5; |
- | const UCNV_LMBCS_6; | + | const LMBCS_6; |
- | const UCNV_LMBCS_8; | + | const LMBCS_8; |
- | const UCNV_LMBCS_11; | + | const LMBCS_11; |
- | const UCNV_LMBCS_16; | + | const LMBCS_16; |
- | const UCNV_LMBCS_17; | + | const LMBCS_17; |
- | const UCNV_LMBCS_18; | + | const LMBCS_18; |
- | const UCNV_LMBCS_19; | + | const LMBCS_19; |
- | const UCNV_LMBCS_LAST; | + | const LMBCS_LAST; |
- | const UCNV_HZ; | + | const HZ; |
- | const UCNV_SCSU; | + | const SCSU; |
- | const UCNV_ISCII; | + | const ISCII; |
- | const UCNV_US_ASCII; | + | const US_ASCII; |
- | const UCNV_UTF7; | + | const UTF7; |
- | const UCNV_BOCU1; | + | const BOCU1; |
- | const UCNV_UTF16; | + | const UTF16; |
- | const UCNV_UTF32; | + | const UTF32; |
- | const UCNV_CESU8; | + | const CESU8; |
- | const UCNV_IMAP_MAILBOX; | + | const IMAP_MAILBOX; |
| | ||
__construct(string $toEncoding, | __construct(string $toEncoding, | ||
Line 77: | Line 84: | ||
| | ||
/* Default callback functions */ | /* Default callback functions */ | ||
- | | + | |
- | | + | |
| | ||
/* Primary conversion workhorses */ | /* Primary conversion workhorses */ | ||
string convert(string $str[, bool $reserve = false]); | string convert(string $str[, bool $reserve = false]); | ||
static string transcode(string $str, string $toEncoding, | static string transcode(string $str, string $toEncoding, | ||
+ | | ||
+ | /* Errors */ | ||
+ | int getErrorCode(); | ||
+ | string getErrorMessage(); | ||
| | ||
/* Ennumeration and lookup */ | /* Ennumeration and lookup */ | ||
Line 133: | Line 144: | ||
===== Advanced Use ===== | ===== Advanced Use ===== | ||
- | The UConverter class may be extended and its default methods | + | The UConverter class actually does two conversion cycles. |
class MyConverter extends UConverter { | class MyConverter extends UConverter { | ||
public function fromUCallback($reason, | public function fromUCallback($reason, | ||
- | if (($reason == UConverter:: | + | if (($reason == UConverter:: |
// Basic transliteration ' | // Basic transliteration ' | ||
$error = U_ZERO_ERROR; | $error = U_ZERO_ERROR; | ||
Line 148: | Line 159: | ||
// Yields " | // Yields " | ||
- | ===== Error Handling ===== | + | $reason will be one of the UConverterCallbackReason constants defined in the class definition above. |
- | Any errors encountered while calling UConverter:: | + | $source |
+ | $codeUnits is one (or more) code unit from the original string in its source encoding which was unable to be translated to Unicode. | ||
+ | |||
+ | $codepoint is the Unicode character from the intermediate string which could not be converter to the output encoding. | ||
+ | |||
+ | $error is a by-reference value which will contain the specific ICU error encountered on input, and should be modified to U_ZERO_ERROR (or some appropriate value) before returning the replacement codepoint/ | ||
+ | |||
+ | Return values for this method may be: NULL, Long, String, or Array. | ||
+ | |||
+ | ===== Error Handling ===== | ||
+ | |||
+ | Follows ext/intl convention of storing for later inspection by getErrorCode()/ | ||
===== Ennumerators ===== | ===== Ennumerators ===== | ||
A few enumeration methods are exposed as convenience. | A few enumeration methods are exposed as convenience. | ||
- | ===== Patch ===== | + | ===== References |
+ | |||
+ | ICU4C ucnv.h documentation: | ||
- | An implementation of the above can be found at https:// | + | Path: An implementation of the above can be found at https:// |
rfc/uconverter.1351617042.txt.gz · Last modified: 2017/09/22 13:28 (external edit)