2 * string_t.c - dynamic string handling module
4 * Copyright (C) 2005 Free Software Foundation, Inc.
6 * This file is part of GNU Wget.
8 * GNU Wget is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * GNU Wget is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * In addition, as a special exception, the Free Software Foundation
22 * gives permission to link the code of its release of Wget with the
23 * OpenSSL project's "OpenSSL" library (or with modified versions of it
24 * that use the same license as the "OpenSSL" library), and distribute
25 * the linked executables. You must obey the GNU General Public License
26 * in all respects for all of the code used other than "OpenSSL". If you
27 * modify this file, you may extend this exception to your version of the
28 * file, but you are not obligated to do so. If you do not wish to do
29 * so, delete this exception statement from your version.
34 #define _GNU_SOURCE /* to get iswblank */
48 #define xmalloc malloc
49 #define xrealloc realloc
50 #define xfree_null(p) if (!(p)) ; else free (p)
56 static const wchar_t w_line_delim[] = L"\r\n";
57 static const char line_delim[] = "\r\n";
58 static const unsigned int line_delim_len = 2;
60 static const wchar_t w_line_delim[] = L"\n";
61 static const char line_delim[] = "\n";
62 static const unsigned int line_delim_len = 2;
65 typedef struct string_t {
66 char *sz; /* standard null-terminated string */
67 unsigned int len; /* number of chars in the allocated buffer */
68 unsigned int used; /* number of used chars */
71 #ifdef STRING_MODULE_DEBUG
73 #define assert_valid_string(str) \
74 assert (((str) != NULL) \
75 && ((str)->sz != NULL) \
76 && ((str)->used + 1 <= (str)->len));
79 string_dump (struct string_t *str, FILE *out)
81 assert_valid_string (str);
84 fprintf (out, "string_dump: str->sz = %s (%p)\n", str->sz, str->sz);
85 fprintf (out, "string_dump: *(str->sz) = %d\n", *(str->sz));
86 fprintf (out, "string_dump: str->len = %u\n", str->len);
87 fprintf (out, "string_dump: str->used = %u\n", str->used);
90 #define DEBUG_PRINTF(x) printf x
92 #else /* not defined STRING_MODULE_DEBUG */
94 #define assert_valid_string(str) do {} while (0);
95 #define string_dump(str, out) do {} while (0);
96 #define DEBUG_PRINTF(x) do {} while (0);
102 string_init (struct string_t *s, unsigned int len)
106 /* no need to check that len > 0, since the len == 0 case is ok */
110 * for the moment we try to perform a reasonable allocation by rounding up
111 * the number of requested chars (including the trailing zero) to the
112 * closest multiple of 256, but we should probably find a better allocation
113 * policy or completely leave the optimization of memory allocation to malloc
115 to_alloc = ((len + 1 + 256) & (~0xFF));
117 s->sz = (char *) xmalloc (to_alloc * sizeof (char));
122 string_dump (s, stdout);
126 string_copy (struct string_t *dst, const void *src, unsigned int len)
128 assert_valid_string (dst);
129 assert (src != NULL);
131 /* no need to do anything */
132 if (len == 0) return;
134 if (dst->sz == NULL) {
135 string_init (dst, len);
138 strncpy (dst->sz, (const char *) src, len);
145 string_cat (struct string_t *dst, const void *src, unsigned int len)
147 assert_valid_string (dst);
148 assert (src != NULL);
150 /* no need to do anything */
151 if (len == 0) return;
153 if (dst->sz == NULL) {
154 string_init (dst, len);
157 strncpy (dst->sz + dst->used, (const char *) src, len);
158 dst->sz[dst->used + len] = '\0';
164 string_ready (struct string_t *str, unsigned int len)
166 assert_valid_string (str);
168 /* no need to do anything */
169 if (len == 0) return;
171 if (str->len - str->used < len)
173 DEBUG_PRINTF (("calling xrealloc"));
174 str->sz = xrealloc (str->sz, str->len + len);
181 string_destroy (struct string_t *str)
183 assert_valid_string (str);
185 xfree_null (str->sz);
186 memset (str, 0, sizeof (*str));
191 string_append_delim (struct string_t *dst)
193 assert_valid_string (dst);
194 string_cat (dst, line_delim, line_delim_len);
199 is_line_delim (const wchar_t *wsz)
201 assert (wsz != NULL);
203 if (*wsz == L'\r' && *(wsz + 1) == L'\n') {
205 } else if (*wsz == L'\r' || *wsz == L'\n') {
213 * DEST is the string to which the multibyte stuff will be added
214 * TO_ESC is the null wide char string to add
217 string_append_multibyte (struct string_t *dest, const wchar_t *wstr, unsigned int len, mbstate_t *state)
221 assert_valid_string (dest);
222 assert (wstr != NULL);
223 assert (state != NULL);
226 if (len == 0) return;
228 string_ready (dest, 4 * MB_CUR_MAX * (len + 1));
230 DEBUG_PRINTF (("string_append_multibyte: len = %u\n", len));
231 string_dump (dest, stdout);
233 for (i = 0; len > 0; ++i, --len) {
234 size_t copied = wcrtomb (dest->sz + dest->used, *(wstr + i), state);
236 DEBUG_PRINTF (("string_append_multibyte (loop): i = %d\n", i));
237 DEBUG_PRINTF (("string_append_multibyte (loop): copied = %u\n", copied));
238 string_dump (dest, stdout);
240 if (copied == (size_t)(-1)) {
244 dest->used += copied;
245 *(dest->sz + dest->used) = '\0';
247 DEBUG_PRINTF (("string_append_multibyte (loop): processed %s\n", dest->sz + dest->used - copied));
252 string_append_multibyte_newline (struct string_t *dest, mbstate_t *state)
254 assert_valid_string (dest);
255 string_append_multibyte(dest, w_line_delim, line_delim_len, state);
259 string_append_multibyte_terminator (struct string_t *dest, mbstate_t *state)
261 const wchar_t terminator = L'\0';
263 assert_valid_string (dest);
264 string_append_multibyte(dest, &terminator, 1, state);
268 * DEST is the string to which the escape code will be added
269 * TO_ESC is the (not necessarily null terminated) string to escape
270 * LEN is the length of the string to escape
273 do_escape (struct string_t *dest, const char *to_esc, unsigned int len, mbstate_t *state)
275 /* we only need to allocate 5 chars for byte:
276 * - one for the leading backslash
277 * - three for the octal representation
278 * - one for the trailing zero */
279 wchar_t buf[8] = L"";
280 size_t buf_elems = sizeof(buf)/sizeof(buf[0]);
283 assert_valid_string (dest);
284 assert (to_esc != NULL);
285 assert (state != NULL);
288 if (len == 0) return;
290 DEBUG_PRINTF (("do_escape: len = %d\n", len));
291 string_dump (dest, stdout);
293 for (i = 0; len > 0; ++i, --len) {
294 int j = (unsigned char)*(to_esc + i);
295 int cc = swprintf (buf, buf_elems, L"\\%03o", j);
296 assert(cc > 0 && cc < buf_elems);
297 DEBUG_PRINTF (("do_escape (loop): escaping \\%03o\n", j));
298 buf[buf_elems - 1] = L'\0';
299 assert (wcslen(buf) == 4);
300 string_append_multibyte (dest, buf, 4, state);
305 string_escape (struct string_t *str)
309 mbstate_t state1, state2;
311 unsigned int to_read;
315 assert_valid_string (str);
317 memset (&state1, '\0', sizeof (state1));
318 memset (&state2, '\0', sizeof (state2));
321 to_read = src.used + 1;
323 /* this value is completely arbitrary */
324 string_init (str, 4 * to_read);
326 DEBUG_PRINTF (("string_escape: dumping string src"));
327 string_dump (&src, stdout);
328 DEBUG_PRINTF (("string_escape: dumping string str"));
329 string_dump (str, stdout);
332 while ((ret = mbrtowc (&c, src.sz + i, to_read, &state1)) != 0) {
333 DEBUG_PRINTF (("string_escape (loop): ret = %d\n", ret));
334 if (ret == (size_t)(-2)) {
335 DEBUG_PRINTF (("string_escape (loop): handling ret == -2"));
336 /* mauro: should we just return the portion of the string already
337 * processed and print an error message? */
340 } else if (ret == (size_t)(-1)) {
341 DEBUG_PRINTF (("string_escape (loop): handling ret == -1"));
342 do_escape (str, src.sz + i, 1, &state2);
344 } else if ((delim_size = is_line_delim(&c))) {
345 DEBUG_PRINTF (("string_escape (loop): handling ret == line_delim"));
347 string_append_multibyte_newline (str, &state2);
348 } else if (iswprint(c) || iswblank(c)) {
349 DEBUG_PRINTF (("string_escape (loop): handling ret == blank | printable"));
350 string_append_multibyte (str, &c, 1, &state2);
352 /* since the backslash character is used to escape unprintable data,
353 * in order to avoid ambiguities in the escaped string we have to
354 * escape backslashes as well */
355 string_append_multibyte (str, &c, 1, &state2);
359 DEBUG_PRINTF (("string_escape (loop): handling ret == toescape"));
360 do_escape (str, src.sz + i, ret, &state2);
365 string_append_multibyte_terminator (str, &state2);
367 string_destroy (&src);
371 * BUF must be a null-terminated dynamically allocated string
372 * LEN is the size of the string BUF
375 escape_buffer (char **buf, size_t len)
379 assert (buf != NULL && *buf != NULL);
382 if (len == 0) return;
384 DEBUG_PRINTF (("escape_buffer processing: %s (len %u)\n", *buf, len));
399 const size_t buflen = 512;
400 buf = (char *) xmalloc(buflen);
401 assert (buf != NULL);
403 puts ("--------------------------------------------------------------------------------");
405 while (fgets (buf, buflen - 1, stdin) != NULL)
407 /* just in case... */
408 buf[buflen - 1] = '\0';
409 printf ("before escape: %s", buf);
410 escape_buffer (&buf, strlen(buf));
411 printf ("after escape: %s", buf);
414 puts ("--------------------------------------------------------------------------------");