42 #define guchar unsigned char
45 #define guint unsigned int
46 #define gushort unsigned short
47 #define gint16 int16_t
48 #define guint16 uint16_t
49 #define gunichar uint32_t
51 #define gssize ssize_t
52 #define g_malloc malloc
54 #define g_return_val_if_fail(expr,val) { \
88 #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))
90 #define G_UNLIKELY(expr) (expr)
128 #define g_utf8_next_char(p) ((p) + g_utf8_skip[*(const guchar *)(p)])
153 #define UTF8_COMPUTE(Char, Mask, Len) \
159 else if ((Char & 0xe0) == 0xc0) \
164 else if ((Char & 0xf0) == 0xe0) \
169 else if ((Char & 0xf8) == 0xf0) \
174 else if ((Char & 0xfc) == 0xf8) \
179 else if ((Char & 0xfe) == 0xfc) \
187 #define UTF8_LENGTH(Char) \
188 ((Char) < 0x80 ? 1 : \
189 ((Char) < 0x800 ? 2 : \
190 ((Char) < 0x10000 ? 3 : \
191 ((Char) < 0x200000 ? 4 : \
192 ((Char) < 0x4000000 ? 5 : 6)))))
194 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
195 (Result) = (Chars)[0] & (Mask); \
196 for ((Count) = 1; (Count) < (Len); ++(Count)) \
198 if (((Chars)[(Count)] & 0xc0) != 0x80) \
204 (Result) |= ((Chars)[(Count)] & 0x3f); \
207 static const gchar utf8_skip_data[256] = {
208 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
210 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
218 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
222 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
226 static const gchar *
const g_utf8_skip = utf8_skip_data;
245 const gchar *start = p;
263 while (p - start < max && *p)
272 if (p - start <= max)
292 g_utf8_get_char (
const gchar * p)
294 int i, mask = 0, len;
296 unsigned char c = (
unsigned char) *p;
335 else if (c < 0x10000)
340 else if (c < 0x200000)
345 else if (c < 0x4000000)
358 for (i = len - 1; i > 0; --i)
360 outbuf[i] = (c & 0x3f) | 0x80;
363 outbuf[0] = c | first;
387 g_utf8_to_ucs4_fast (
const gchar * str,
glong len,
glong * items_written)
407 while (p < str + len && *p)
419 for (i = 0; i < n_chars; i++)
444 wc |= (
guchar) (*p++) & 0x3f;
447 while ((wc & mask) != 0);
486 g_ucs4_to_utf8 (
const gunichar * str,
491 gchar *result = NULL;
496 for (i = 0; len < 0 || i < len; i++)
501 if (str[i] >= 0x80000000)
507 result =
g_malloc (result_length + 1);
513 while (p < result + result_length)
514 p += g_unichar_to_utf8 (str[i++], p);
519 *items_written = p - result;
554 #define CC_PART1(Page, Char) \
555 ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
556 ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
557 : (cclass_data[combining_class_table_part1[Page]][Char]))
559 #define CC_PART2(Page, Char) \
560 ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
561 ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
562 : (cclass_data[combining_class_table_part2[Page]][Char]))
564 #define COMBINING_CLASS(Char) \
565 (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
566 ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
567 : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
568 ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
579 #define NCount (VCount * TCount)
580 #define SCount (LCount * NCount)
603 for (i = 0; i < len - 1; ++i)
606 if (next != 0 && last > next)
610 for (j = i + 1; j > 0; --j)
616 string[j] =
string[j - 1];
648 r[2] =
TBase + TIndex;
662 if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
666 int half = (start + end) / 2;
667 if (ch == decomp_table[half].ch)
673 offset = decomp_table[half].compat_offset;
675 offset = decomp_table[half].canon_offset;
679 offset = decomp_table[half].canon_offset;
684 return &(decomp_expansion_string[offset]);
686 else if (half == start)
688 else if (ch > decomp_table[half].ch)
708 if (0 <= LIndex && LIndex <
LCount && 0 <= VIndex && VIndex <
VCount)
713 else if (0 <= SIndex && SIndex <
SCount && (SIndex %
TCount) == 0
714 && 0 < TIndex && TIndex <
TCount)
716 *result = a + TIndex;
723 #define CI(Page, Char) \
724 ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
725 ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
726 : (compose_data[compose_table[Page]][Char]))
728 #define COMPOSE_INDEX(Char) \
729 (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
736 if (combine_hangul (a, b, result))
798 while ((max_len < 0 || p < str + max_len) && *p)
803 if (wc >= SBase && wc < SBase +
SCount)
806 decompose_hangul (wc, NULL, &result_len);
811 decomp = find_decomposition (wc, do_compat);
814 n_wc += g_utf8_strlen (decomp, -1);
829 while ((max_len < 0 || p < str + max_len) && *p)
834 gsize old_n_wc = n_wc;
836 if (wc >= SBase && wc < SBase +
SCount)
839 decompose_hangul (wc, wc_buffer + n_wc, &result_len);
844 decomp = find_decomposition (wc, do_compat);
850 wc_buffer[n_wc++] = g_utf8_get_char (pd);
853 wc_buffer[n_wc++] = wc;
862 g_unicode_canonical_ordering (wc_buffer + last_start,
864 last_start = old_n_wc;
873 g_unicode_canonical_ordering (wc_buffer + last_start,
882 if (do_compose && n_wc > 0)
888 for (i = 0; i < n_wc; i++)
893 (last_cc == 0 || last_cc != cc) &&
894 combine (wc_buffer[last_start], wc_buffer[i],
895 &wc_buffer[last_start]))
897 for (j = i + 1; j < n_wc; j++)
898 wc_buffer[j - 1] = wc_buffer[j];
961 gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
964 result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL);
985 return g_utf8_get_char (p);
1002 return g_unichar_to_utf8 (c, outbuf);
1023 return g_utf8_to_ucs4_fast (str, (
glong) len, (
glong *) items_written);
1045 size_t * items_read,
size_t * items_written)
1047 return g_ucs4_to_utf8 (str, len, (
glong *) items_read,
1048 (
glong *) items_written);
1094 uint32_t *result_wc;
#define g_utf8_next_char(p)
uint32_t stringprep_utf8_to_unichar(const char *p)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
#define G_UNICODE_NOT_PRESENT_OFFSET
char * stringprep_utf8_nfkc_normalize(const char *str, ssize_t len)
#define UTF8_LENGTH(Char)
int stringprep_unichar_to_utf8(uint32_t c, char *outbuf)
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
#define G_N_ELEMENTS(arr)
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
#define COMPOSE_FIRST_START
#define COMBINING_CLASS(Char)
#define COMPOSE_FIRST_SINGLE_START
#define g_return_val_if_fail(expr, val)
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
#define UTF8_COMPUTE(Char, Mask, Len)
#define COMPOSE_SECOND_START
#define COMPOSE_SECOND_SINGLE_START
#define COMPOSE_INDEX(Char)