Files
mujs/utf.h
Tor Andersson 832e069049 Support 4-byte UTF-8 sequences.
The following functions are no longer restricted to 16-bit integer values:

	String.fromCharCode()
	String.prototype.charCodeAt()

repr() will not escape SMP characters, as doing so would require conversion to
surrogate pairs, but will encode these characters as UTF-8. Unicode characters
in the BMP will still be escaped with \uXXXX as before.

JSON.stringify() only escapes control characters, so will represent all non-ASCII
characters as UTF-8.

We do no automatic conversions to/from surrogate pairs. Code that worked with
surrogate pairs should not be affected by these changes.
2020-05-27 12:32:32 +02:00

44 lines
1.1 KiB
C

#ifndef js_utf_h
#define js_utf_h
typedef int Rune; /* 32 bits */
#define chartorune jsU_chartorune
#define runetochar jsU_runetochar
#define runelen jsU_runelen
#define utflen jsU_utflen
#define isalpharune jsU_isalpharune
#define islowerrune jsU_islowerrune
#define isspacerune jsU_isspacerune
#define istitlerune jsU_istitlerune
#define isupperrune jsU_isupperrune
#define tolowerrune jsU_tolowerrune
#define totitlerune jsU_totitlerune
#define toupperrune jsU_toupperrune
enum
{
UTFmax = 4, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
Runeerror = 0xFFFD, /* decoding error in UTF */
Runemax = 0x10FFFF, /* maximum rune value */
};
int chartorune(Rune *rune, const char *str);
int runetochar(char *str, const Rune *rune);
int runelen(int c);
int utflen(const char *s);
int isalpharune(Rune c);
int islowerrune(Rune c);
int isspacerune(Rune c);
int istitlerune(Rune c);
int isupperrune(Rune c);
Rune tolowerrune(Rune c);
Rune totitlerune(Rune c);
Rune toupperrune(Rune c);
#endif