libidn 1.41
stringprep.c
Go to the documentation of this file.
1/* stringprep.c --- Core stringprep implementation.
2 Copyright (C) 2002-2022 Simon Josefsson
3
4 This file is part of GNU Libidn.
5
6 GNU Libidn is free software: you can redistribute it and/or
7 modify it under the terms of either:
8
9 * the GNU Lesser General Public License as published by the Free
10 Software Foundation; either version 3 of the License, or (at
11 your option) any later version.
12
13 or
14
15 * the GNU General Public License as published by the Free
16 Software Foundation; either version 2 of the License, or (at
17 your option) any later version.
18
19 or both in parallel, as here.
20
21 GNU Libidn is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received copies of the GNU General Public License and
27 the GNU Lesser General Public License along with this program. If
28 not, see <https://www.gnu.org/licenses/>. */
29
30#ifdef HAVE_CONFIG_H
31# include "config.h"
32#endif
33
34#include <stdlib.h>
35#include <string.h>
36
37#include "stringprep.h"
38
39static int
40_compare_table_element (const uint32_t * c,
42{
43 if (*c < e->start)
44 return -1;
45 if (*c > e->end)
46 return 1;
47 return 0;
48}
49
50static ssize_t
51stringprep_find_character_in_table (uint32_t ucs4,
52 const Stringprep_table_element * table,
53 size_t table_size)
54{
55 /* This is where typical uses of Libidn spends very close to all CPU
56 time and causes most cache misses. One could easily do a binary
57 search instead. Before rewriting this, I want hard evidence this
58 slowness is at all relevant in typical applications. (I don't
59 dispute optimization may improve matters significantly, I'm
60 mostly interested in having someone give real-world benchmark on
61 the impact of libidn.)
62 *
63 * Answer (Tim Rühsen rockdaboot@gmx.de):
64 * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles.
65 * That is almost 20s on my Intel i3 3.1GHz !!!
66 * That even makes fuzzing almost useless, eating up CPU cycles for nothing.
67 *
68 * The bsearch() approach takes ~3 billion CPU cycles.
69 * Almost a factor of 20 faster (but still pretty slow).
70 * There are still ~2 million calls to bsearch() which make ~30% of CPU time used.
71 * Most time is spent in _g_utf8_normalize_wc().
72
73 ssize_t i;
74
75 for (i = 0; table[i].start || table[i].end; i++)
76 if (ucs4 >= table[i].start &&
77 ucs4 <= (table[i].end ? table[i].end : table[i].start))
78 return i;
79 */
80
82 bsearch (&ucs4, table, table_size, sizeof (Stringprep_table_element),
83 (int (*)(const void *, const void *)) _compare_table_element);
84
85 return p ? (p - table) : -1;
86}
87
88static ssize_t
89stringprep_find_string_in_table (uint32_t * ucs4,
90 size_t ucs4len,
91 size_t *tablepos,
92 const Stringprep_table_element * table,
93 size_t table_size)
94{
95 size_t j;
96 ssize_t pos;
97
98 for (j = 0; j < ucs4len; j++)
99 if ((pos =
100 stringprep_find_character_in_table (ucs4[j], table,
101 table_size)) != -1)
102 {
103 if (tablepos)
104 *tablepos = pos;
105 return j;
106 }
107
108 return -1;
109}
110
111static int
112stringprep_apply_table_to_string (uint32_t * ucs4,
113 size_t *ucs4len,
114 size_t maxucs4len,
115 const Stringprep_table_element * table,
116 size_t table_size)
117{
118 ssize_t pos;
119 size_t i, maplen;
120 uint32_t *src = ucs4; /* points to unprocessed data */
121 size_t srclen = *ucs4len; /* length of unprocessed data */
122
123 while ((pos = stringprep_find_string_in_table (src, srclen,
124 &i, table,
125 table_size)) != -1)
126 {
127 for (maplen = STRINGPREP_MAX_MAP_CHARS;
128 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
129 ;
130
131 if (*ucs4len - 1 + maplen >= maxucs4len)
133
134 memmove (src + pos + maplen, src + pos + 1,
135 sizeof (uint32_t) * (srclen - pos - 1));
136 memcpy (src + pos, table[i].map, sizeof (uint32_t) * maplen);
137 *ucs4len = *ucs4len - 1 + maplen;
138 src += pos + maplen;
139 srclen -= pos + 1;
140 }
141
142 return STRINGPREP_OK;
143}
144
145#define INVERTED(x) ((x) & ((~0UL) >> 1))
146#define UNAPPLICAPLEFLAGS(flags, profileflags) \
147 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
148 ( INVERTED(profileflags) && (profileflags & flags)))
149
181int
182stringprep_4i (uint32_t * ucs4, size_t *len, size_t maxucs4len,
184 const Stringprep_profile * profile)
185{
186 size_t i, j;
187 ssize_t k;
188 size_t ucs4len = *len;
189 int rc;
190
191 for (i = 0; profile[i].operation; i++)
192 {
193 switch (profile[i].operation)
194 {
195 case STRINGPREP_NFKC:
196 {
197 uint32_t *q = 0;
198
199 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
200 break;
201
202 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
203 /* Profile requires NFKC, but callee asked for no NFKC. */
205
206 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
207 if (!q)
209
210 for (ucs4len = 0; q[ucs4len]; ucs4len++)
211 ;
212
213 if (ucs4len >= maxucs4len)
214 {
215 free (q);
217 }
218
219 memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
220
221 free (q);
222 }
223 break;
224
226 k = stringprep_find_string_in_table (ucs4, ucs4len,
227 NULL, profile[i].table,
228 profile[i].table_size);
229 if (k != -1)
231 break;
232
234 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
235 break;
236 if (flags & STRINGPREP_NO_UNASSIGNED)
237 {
238 k = stringprep_find_string_in_table
239 (ucs4, ucs4len, NULL, profile[i].table,
240 profile[i].table_size);
241 if (k != -1)
243 }
244 break;
245
247 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
248 break;
249 rc = stringprep_apply_table_to_string
250 (ucs4, &ucs4len, maxucs4len, profile[i].table,
251 profile[i].table_size);
252 if (rc != STRINGPREP_OK)
253 return rc;
254 break;
255
259 break;
260
261 case STRINGPREP_BIDI:
262 {
263 int done_prohibited = 0;
264 int done_ral = 0;
265 int done_l = 0;
266 size_t contains_ral = SIZE_MAX;
267 size_t contains_l = SIZE_MAX;
268
269 for (j = 0; profile[j].operation; j++)
270 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
271 {
272 done_prohibited = 1;
273 k = stringprep_find_string_in_table (ucs4, ucs4len,
274 NULL,
275 profile[j].table,
276 profile[j].table_size);
277 if (k != -1)
279 }
280 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
281 {
282 done_ral = 1;
283 if (stringprep_find_string_in_table
284 (ucs4, ucs4len, NULL, profile[j].table,
285 profile[j].table_size) != -1)
286 contains_ral = j;
287 }
288 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
289 {
290 done_l = 1;
291 if (stringprep_find_string_in_table
292 (ucs4, ucs4len, NULL, profile[j].table,
293 profile[j].table_size) != -1)
294 contains_l = j;
295 }
296
297 if (!done_prohibited || !done_ral || !done_l)
299
300 if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
302
303 if (contains_ral != SIZE_MAX)
304 {
305 if (!(stringprep_find_character_in_table
306 (ucs4[0], profile[contains_ral].table,
307 profile[contains_ral].table_size) != -1
308 &&
309 stringprep_find_character_in_table (ucs4[ucs4len - 1],
310 profile
311 [contains_ral].table,
312 profile
313 [contains_ral].table_size)
314 != -1))
316 }
317 }
318 break;
319
320 default:
322 break;
323 }
324 }
325
326 *len = ucs4len;
327
328 return STRINGPREP_OK;
329}
330
331static int
332stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
334 const Stringprep_profile * profile)
335{
336 int rc;
337
338 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
339 if (rc != STRINGPREP_OK)
340 return rc;
341
342 if (ucs4len >= maxucs4len)
344
345 ucs4[ucs4len] = 0;
346
347 return STRINGPREP_OK;
348}
349
374int
375stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
377 const Stringprep_profile * profile)
378{
379 size_t ucs4len;
380
381 for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
382 ;
383
384 return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
385}
386
414int
415stringprep (char *in,
416 size_t maxlen,
418 const Stringprep_profile * profile)
419{
420 int rc;
421 char *utf8 = NULL;
422 uint32_t *ucs4 = NULL;
423 size_t ucs4len, maxucs4len, adducs4len = strlen (in) / 10 + 1;
424
425 do
426 {
427 uint32_t *newp;
428
429 free (ucs4);
430 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
431 if (ucs4 == NULL)
433 maxucs4len = ucs4len + adducs4len;
434 newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
435 if (!newp)
436 {
437 free (ucs4);
439 }
440 ucs4 = newp;
441
442 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
443 adducs4len *= 2;
444 }
445 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
446 if (rc != STRINGPREP_OK)
447 {
448 free (ucs4);
449 return rc;
450 }
451
452 utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
453 free (ucs4);
454 if (!utf8)
456
457 if (strlen (utf8) >= maxlen)
458 {
459 free (utf8);
461 }
462
463 strcpy (in, utf8); /* flawfinder: ignore */
464
465 free (utf8);
466
467 return STRINGPREP_OK;
468}
469
494int
495stringprep_profile (const char *in,
496 char **out,
497 const char *profile, Stringprep_profile_flags flags)
498{
499 const Stringprep_profiles *p;
500 char *str = NULL;
501 size_t len = strlen (in) + 1, addlen = len / 10 + 1;
502 int rc;
503
504 for (p = &stringprep_profiles[0]; p->name; p++)
505 if (strcmp (p->name, profile) == 0)
506 break;
507
508 if (!p || !p->name || !p->tables)
510
511 do
512 {
513 free (str);
514 str = (char *) malloc (len);
515 if (str == NULL)
517
518 strcpy (str, in);
519
520 rc = stringprep (str, len, flags, p->tables);
521 len += addlen;
522 addlen *= 2;
523 }
524 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
525
526 if (rc == STRINGPREP_OK)
527 *out = str;
528 else
529 free (str);
530
531 return rc;
532}
533
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
Definition: nfkc.c:1040
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
Definition: nfkc.c:1098
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition: nfkc.c:1007
const Stringprep_profiles stringprep_profiles[]
Definition: profiles.c:34
int stringprep_4zi(uint32_t *ucs4, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:375
int stringprep_4i(uint32_t *ucs4, size_t *len, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:182
int stringprep_profile(const char *in, char **out, const char *profile, Stringprep_profile_flags flags)
Definition: stringprep.c:495
#define UNAPPLICAPLEFLAGS(flags, profileflags)
Definition: stringprep.c:146
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:415
@ STRINGPREP_BIDI_PROHIBIT_TABLE
Definition: stringprep.h:101
@ STRINGPREP_BIDI_RAL_TABLE
Definition: stringprep.h:102
@ STRINGPREP_NFKC
Definition: stringprep.h:96
@ STRINGPREP_PROHIBIT_TABLE
Definition: stringprep.h:100
@ STRINGPREP_UNASSIGNED_TABLE
Definition: stringprep.h:99
@ STRINGPREP_BIDI
Definition: stringprep.h:97
@ STRINGPREP_MAP_TABLE
Definition: stringprep.h:98
@ STRINGPREP_BIDI_L_TABLE
Definition: stringprep.h:103
Stringprep_profile_flags
Definition: stringprep.h:87
@ STRINGPREP_NO_NFKC
Definition: stringprep.h:88
@ STRINGPREP_NO_UNASSIGNED
Definition: stringprep.h:90
@ STRINGPREP_UNKNOWN_PROFILE
Definition: stringprep.h:78
@ STRINGPREP_NFKC_FAILED
Definition: stringprep.h:81
@ STRINGPREP_TOO_SMALL_BUFFER
Definition: stringprep.h:75
@ STRINGPREP_ICONV_ERROR
Definition: stringprep.h:79
@ STRINGPREP_MALLOC_ERROR
Definition: stringprep.h:82
@ STRINGPREP_FLAG_ERROR
Definition: stringprep.h:77
@ STRINGPREP_OK
Definition: stringprep.h:67
@ STRINGPREP_CONTAINS_UNASSIGNED
Definition: stringprep.h:69
@ STRINGPREP_CONTAINS_PROHIBITED
Definition: stringprep.h:70
@ STRINGPREP_BIDI_CONTAINS_PROHIBITED
Definition: stringprep.h:73
@ STRINGPREP_BIDI_BOTH_L_AND_RAL
Definition: stringprep.h:71
@ STRINGPREP_BIDI_LEADTRAIL_NOT_RAL
Definition: stringprep.h:72
@ STRINGPREP_PROFILE_ERROR
Definition: stringprep.h:76
#define STRINGPREP_MAX_MAP_CHARS
Definition: stringprep.h:106
const char * name
Definition: stringprep.h:171
const Stringprep_profile * tables
Definition: stringprep.h:172
uint32_t map[STRINGPREP_MAX_MAP_CHARS]
Definition: stringprep.h:135
Stringprep_profile_steps operation
Definition: stringprep.h:150