1 /* $Id: string-utf.cc,v 1.6 2004/09/11 23:26:30 atterer Exp $ -*- C++ -*-
3 |_) /| Copyright (C) 2003 | richard@
4 | \/¯| Richard Atterer | atterer.org
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2. See
8 the file COPYING for details.
10 Helper functions for dealing with UTF-8 strings
20 #include <string-utf.hh>
21 //______________________________________________________________________
24 const int BUF_LEN = 40; // Enough room for 128-bit integers. :-)
26 const char* const PAD = " ";
27 const char* const PAD_END = PAD + 40;
30 string& append(string& s, double x) {
31 snprintf(buf, BUF_LEN, "%.1f", x);
32 buf[BUF_LEN - 1] = '\0';
35 string& append(string& s, int x) {
36 snprintf(buf, BUF_LEN, "%d", x);
37 buf[BUF_LEN - 1] = '\0';
40 string& append(string& s, unsigned x) {
41 snprintf(buf, BUF_LEN, "%u", x);
42 buf[BUF_LEN - 1] = '\0';
45 string& append(string& s, unsigned x, int width) {
46 Assert(*PAD_END == '\0' && width < PAD_END - PAD);
47 int written = snprintf(buf, BUF_LEN, "%u", x);
48 if (written < width) s += PAD_END - width + written;
49 buf[BUF_LEN - 1] = '\0';
52 string& append(string& s, long x) {
53 snprintf(buf, BUF_LEN, "%ld", x);
54 buf[BUF_LEN - 1] = '\0';
57 string& append(string& s, unsigned long x) {
58 snprintf(buf, BUF_LEN, "%lu", x);
59 buf[BUF_LEN - 1] = '\0';
62 string& append(string& s, unsigned long x, int width) {
63 Assert(*PAD_END == '\0' && width < PAD_END - PAD);
64 int written = snprintf(buf, BUF_LEN, "%lu", x);
65 if (written < width) s += PAD_END - width + written;
66 buf[BUF_LEN - 1] = '\0';
69 #if HAVE_UNSIGNED_LONG_LONG
70 string& append(string& s, unsigned long long x) {
71 snprintf(buf, BUF_LEN, "%llu", x);
72 buf[BUF_LEN - 1] = '\0';
75 string& append(string& s, unsigned long long x, int width) {
76 Assert(*PAD_END == '\0' && width < PAD_END - PAD);
77 int written = snprintf(buf, BUF_LEN, "%llu", x);
78 if (written < width) s += PAD_END - width + written;
79 buf[BUF_LEN - 1] = '\0';
83 //______________________________________________________________________
88 static const int F = 1 << 0; // fast substitution
89 static const int L = 1 << 1; // locale-format string arg, not UTF-8
90 static const int E = 1 << 2; // escape: turn < into < etc
91 //____________________
93 // Convert input to UTF-8. Returned string must be freed.
94 inline gchar* localeToUTF8(const char* input) {
97 int len = strlen(input);
98 gchar* s = g_locale_to_utf8(input, len, NULL, &written, &error);
99 if (error == NULL) return s;
100 if (s != NULL) g_free(s);
102 /* Maybe this is just me, but for me, glib always thinks that my charset is
103 "ANSI_X3.4-1968" - ?! Fall back to ISO-8859-15 if glib failed above.
104 Users can override this by setting the CHARSET variable. */
105 g_clear_error(&error);
106 s = g_convert(input, len, "UTF-8", "ISO-8859-1", NULL, &written, &error);
107 if (error == NULL || s != NULL) return s;
108 g_clear_error(&error);
109 return g_strdup("[UTF8ConvFail]");
111 //____________________
113 /* Append s to result according to flags */
114 void strSubst(string& result, const char* s, int flags) {
120 Paranoid(false); // Already handled in doSubst() below
125 // Convert locale-format to UTF
133 // Convert locale-format to UTF and turn < into <
137 if (*s == '&') result += "&";
138 else if (*s == '<') result += "<";
139 else if (*s == '>') result += ">";
148 if (*s == '&') result += "&";
149 else if (*s == '<') result += "<";
150 else if (*s == '>') result += ">";
157 // Verify UTF-8, only append up to first invalid character
158 g_utf8_validate(s, -1, &e);
159 result.append(s, e - s);
163 // Verify UTF-8 data, turn < into <
164 g_utf8_validate(s, -1, &e);
166 if (*s == '&') result += "&";
167 else if (*s == '<') result += "<";
168 else if (*s == '>') result += ">";
180 //____________________
183 /* Look at arg[n] and append it to result according to flags */
184 inline void Subst::doSubst(string& result, const Subst arg[], int n,
186 switch (arg[n].type) {
188 snprintf(buf, BUF_LEN, "%d", arg[n].val.intVal);
189 buf[BUF_LEN - 1] = '\0'; result += buf; break;
191 snprintf(buf, BUF_LEN, "%u", arg[n].val.unsignedVal);
192 buf[BUF_LEN - 1] = '\0'; result += buf; break;
194 snprintf(buf, BUF_LEN, "%ld", arg[n].val.longVal);
195 buf[BUF_LEN - 1] = '\0'; result += buf; break;
197 snprintf(buf, BUF_LEN, "%lu", arg[n].val.ulongVal);
198 buf[BUF_LEN - 1] = '\0'; result += buf; break;
199 # if HAVE_UNSIGNED_LONG_LONG
201 snprintf(buf, BUF_LEN, "%llu", arg[n].val.ulonglongVal);
202 buf[BUF_LEN - 1] = '\0'; result += buf; break;
205 snprintf(buf, BUF_LEN, "%f", arg[n].val.doubleVal);
206 buf[BUF_LEN - 1] = '\0'; result += buf; break;
208 result += arg[n].val.charVal;
212 result += arg[n].val.charPtr;
214 strSubst(result, arg[n].val.charPtr, flags);
218 result += *arg[n].val.stringPtr;
220 strSubst(result, arg[n].val.stringPtr->c_str(), flags);
223 snprintf(buf, BUF_LEN, "%p", arg[n].val.pointerVal);
224 buf[BUF_LEN - 1] = '\0'; result += buf; break;
228 //____________________
230 string Subst::subst(const char* format, int args, const Subst arg[]) {
233 const char* i = format;
234 unsigned max = '1' + args;
237 // Search through string until either '%' or '\0' found
240 x = g_utf8_get_char(j);
241 if (x == 0 || x == '%') break;
242 j = g_utf8_next_char(j);
244 // x == '%' or x == 0, normal string between [i;j)
245 result.append(i, j - i);
246 if (x == 0) return result;
248 // '%' escape detected
251 j = g_utf8_next_char(j);
252 x = g_utf8_get_char(j);
253 // Handle special flags between % and digit
254 if (x == 0) return result;
255 else if (x == '%') { result += '%'; break; }
256 else if (x == 'F') flags |= F;
257 else if (x == 'L') flags |= L;
258 else if (x == 'E') flags |= E;
259 // Ignore other characters, loop until digit found
260 else if (x >= '1' && x <= '9') {
261 if (x < max) doSubst(result, arg, x - '1', flags); // Arg subst
265 // Now j points to digit in "%1" or second '%' in "%%"
267 i = ++j; // i and j point after digit/%