libidn 1.41
tld.c
Go to the documentation of this file.
1/* tld.c --- Declarations for TLD restriction checking.
2 Copyright (C) 2004-2022 Simon Josefsson.
3 Copyright (C) 2003-2022 Free Software Foundation, Inc.
4
5 Author: Thomas Jacob, Internet24.de
6
7 This file is part of GNU Libidn.
8
9 GNU Libidn is free software: you can redistribute it and/or
10 modify it under the terms of either:
11
12 * the GNU Lesser General Public License as published by the Free
13 Software Foundation; either version 3 of the License, or (at
14 your option) any later version.
15
16 or
17
18 * the GNU General Public License as published by the Free
19 Software Foundation; either version 2 of the License, or (at
20 your option) any later version.
21
22 or both in parallel, as here.
23
24 GNU Libidn is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 General Public License for more details.
28
29 You should have received copies of the GNU General Public License and
30 the GNU Lesser General Public License along with this program. If
31 not, see <https://www.gnu.org/licenses/>. */
32
33#include <config.h>
34
35/* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
36#include <stringprep.h>
37
38/* Get strcmp(). */
39#include <string.h>
40
41/* Get specifications. */
42#include <tld.h>
43
44/* Array of built-in domain restriction structures. See tlds.c. */
45extern const Tld_table *_tld_tables[];
46
59const Tld_table *
60tld_get_table (const char *tld, const Tld_table ** tables)
61{
62 const Tld_table **tldtable = NULL;
63
64 if (!tld || !tables)
65 return NULL;
66
67 for (tldtable = tables; *tldtable; tldtable++)
68 if (!strcmp ((*tldtable)->name, tld))
69 return *tldtable;
70
71 return NULL;
72}
73
88const Tld_table *
89tld_default_table (const char *tld, const Tld_table ** overrides)
90{
91 const Tld_table *tldtable = NULL;
92
93 if (!tld)
94 return NULL;
95
96 if (overrides)
97 tldtable = tld_get_table (tld, overrides);
98
99 if (!tldtable)
100 tldtable = tld_get_table (tld, _tld_tables);
101
102 return tldtable;
103}
104
105#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
106 (c) == 0xFF0E || (c) == 0xFF61)
107
121int
122tld_get_4 (const uint32_t * in, size_t inlen, char **out)
123{
124 const uint32_t *ipos;
125 size_t olen;
126
127 *out = NULL;
128 if (!in || inlen == 0)
129 return TLD_NODATA;
130
131 ipos = &in[inlen - 1];
132 olen = 0;
133 /* Scan backwards for non(latin)letters. */
134 while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
135 (*ipos >= 0x61 && *ipos <= 0x7A)))
136 ipos--, olen++;
137
138 if (olen > 0 && ipos >= in && DOTP (*ipos))
139 {
140 /* Found something that appears a TLD. */
141 char *out_s = malloc (sizeof (char) * (olen + 1));
142 char *opos = out_s;
143
144 if (!opos)
145 return TLD_MALLOC_ERROR;
146
147 ipos++;
148 /* Transcribe to lowercase ascii string. */
149 for (; ipos < &in[inlen]; ipos++, opos++)
150 *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
151 *opos = 0;
152 *out = out_s;
153 return TLD_SUCCESS;
154 }
155
156 return TLD_NO_TLD;
157}
158
170int
171tld_get_4z (const uint32_t * in, char **out)
172{
173 const uint32_t *ipos = in;
174
175 if (!in)
176 return TLD_NODATA;
177
178 while (*ipos)
179 ipos++;
180
181 return tld_get_4 (in, ipos - in, out);
182}
183
196int
197tld_get_z (const char *in, char **out)
198{
199 uint32_t *iucs;
200 size_t i, ilen;
201 int rc;
202
203 ilen = strlen (in);
204 iucs = calloc (ilen, sizeof (*iucs));
205
206 if (!iucs)
207 return TLD_MALLOC_ERROR;
208
209 for (i = 0; i < ilen; i++)
210 iucs[i] = in[i];
211
212 rc = tld_get_4 (iucs, ilen, out);
213
214 free (iucs);
215
216 return rc;
217}
218
219/*
220 * tld_checkchar - verify that character is permitted
221 * @ch: 32 bit unicode character to check.
222 * @tld: A #Tld_table data structure to check @ch against.
223 *
224 * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
225 * character in @tld.
226 *
227 * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
228 * valid character for the TLD @tld or if @tld is %NULL,
229 * %TLD_INVALID if @ch is invalid as defined by @tld.
230 */
231static int
232_tld_checkchar (uint32_t ch, const Tld_table * tld)
233{
234 const Tld_table_element *s, *e, *m;
235
236 if (!tld)
237 return TLD_SUCCESS;
238
239 /* Check for [-a-z0-9.]. */
240 if ((ch >= 0x61 && ch <= 0x7A) ||
241 (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
242 return TLD_SUCCESS;
243
244 s = tld->valid;
245 e = s + tld->nvalid;
246 while (s < e)
247 {
248 m = s + ((e - s) >> 1);
249 if (ch < m->start)
250 e = m;
251 else if (ch > m->end)
252 s = m + 1;
253 else
254 return TLD_SUCCESS;
255 }
256
257 return TLD_INVALID;
258}
259
279int
280tld_check_4t (const uint32_t * in, size_t inlen, size_t *errpos,
281 const Tld_table * tld)
282{
283 const uint32_t *ipos;
284 int rc;
285
286 if (!tld) /* No data for TLD so everything is valid. */
287 return TLD_SUCCESS;
288
289 ipos = in;
290 while (ipos < &in[inlen])
291 {
292 rc = _tld_checkchar (*ipos, tld);
293 if (rc != TLD_SUCCESS)
294 {
295 if (errpos)
296 *errpos = ipos - in;
297 return rc;
298 }
299 ipos++;
300 }
301 return TLD_SUCCESS;
302}
303
321int
322tld_check_4tz (const uint32_t * in, size_t *errpos, const Tld_table * tld)
323{
324 const uint32_t *ipos = in;
325
326 if (!ipos)
327 return TLD_NODATA;
328
329 while (*ipos)
330 ipos++;
331
332 return tld_check_4t (in, ipos - in, errpos, tld);
333}
334
358int
359tld_check_4 (const uint32_t * in, size_t inlen, size_t *errpos,
360 const Tld_table ** overrides)
361{
362 const Tld_table *tld;
363 char *domain;
364 int rc;
365
366 if (errpos)
367 *errpos = 0;
368
369 /* Get TLD name. */
370 rc = tld_get_4 (in, inlen, &domain);
371
372 if (rc != TLD_SUCCESS)
373 {
374 if (rc == TLD_NO_TLD) /* No TLD, say OK */
375 return TLD_SUCCESS;
376 else
377 return rc;
378 }
379
380 /* Retrieve appropriate data structure. */
381 tld = tld_default_table (domain, overrides);
382 free (domain);
383
384 return tld_check_4t (in, inlen, errpos, tld);
385}
386
408int
409tld_check_4z (const uint32_t * in, size_t *errpos,
410 const Tld_table ** overrides)
411{
412 const uint32_t *ipos = in;
413
414 if (!ipos)
415 return TLD_NODATA;
416
417 while (*ipos)
418 ipos++;
419
420 return tld_check_4 (in, ipos - in, errpos, overrides);
421}
422
446int
447tld_check_8z (const char *in, size_t *errpos, const Tld_table ** overrides)
448{
449 uint32_t *iucs;
450 size_t ilen;
451 int rc;
452
453 if (!in)
454 return TLD_NODATA;
455
456 iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
457
458 if (!iucs)
459 return TLD_MALLOC_ERROR;
460
461 rc = tld_check_4 (iucs, ilen, errpos, overrides);
462
463 free (iucs);
464
465 return rc;
466}
467
491int
492tld_check_lz (const char *in, size_t *errpos, const Tld_table ** overrides)
493{
494 char *utf8;
495 int rc;
496
497 if (!in)
498 return TLD_NODATA;
499
500 utf8 = stringprep_locale_to_utf8 (in);
501 if (!utf8)
502 return TLD_ICONV_ERROR;
503
504
505 rc = tld_check_8z (utf8, errpos, overrides);
506
507 free (utf8);
508
509 return rc;
510}
511
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition: nfkc.c:1007
IDNAPI char * stringprep_locale_to_utf8(const char *str)
Definition: toutf8.c:143
uint32_t end
Definition: tld.h:81
Definition: tld.h:95
const Tld_table_element * valid
Definition: tld.h:99
size_t nvalid
Definition: tld.h:98
int tld_check_8z(const char *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:447
int tld_check_4z(const uint32_t *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:409
const Tld_table * _tld_tables[]
Definition: tlds.c:59
#define DOTP(c)
Definition: tld.c:105
int tld_check_4(const uint32_t *in, size_t inlen, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:359
int tld_check_lz(const char *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:492
const Tld_table * tld_get_table(const char *tld, const Tld_table **tables)
Definition: tld.c:60
int tld_check_4tz(const uint32_t *in, size_t *errpos, const Tld_table *tld)
Definition: tld.c:322
int tld_check_4t(const uint32_t *in, size_t inlen, size_t *errpos, const Tld_table *tld)
Definition: tld.c:280
const Tld_table * tld_default_table(const char *tld, const Tld_table **overrides)
Definition: tld.c:89
int tld_get_4z(const uint32_t *in, char **out)
Definition: tld.c:171
int tld_get_4(const uint32_t *in, size_t inlen, char **out)
Definition: tld.c:122
int tld_get_z(const char *in, char **out)
Definition: tld.c:197
@ TLD_ICONV_ERROR
Definition: tld.h:111
@ TLD_MALLOC_ERROR
Definition: tld.h:110
@ TLD_SUCCESS
Definition: tld.h:107
@ TLD_NODATA
Definition: tld.h:109
@ TLD_NO_TLD
Definition: tld.h:112
@ TLD_INVALID
Definition: tld.h:108