-
Notifications
You must be signed in to change notification settings - Fork 1
/
convsamp.cpp
253 lines (207 loc) · 6.11 KB
/
convsamp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
/**************************************************************************
*
* Copyright (C) 2000-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
***************************************************************************
* file name: convsamp.c
* encoding: ASCII (7-bit)
*
* created on: 2000may30
* created by: Steven R. Loomis
*
* Sample code for the ICU conversion routines.
*
* Note: Nothing special is needed to build this sample. Link with
* the icu UC and icu I18N libraries.
*
* I use 'assert' for error checking, you probably will want
* something more flexible. '***BEGIN SAMPLE***' and
* '***END SAMPLE***' mark pieces suitable for stand alone
* code snippets.
*
*
* Each test can define it's own BUFFERSIZE
*
*/
#define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
#include <stdio.h>
#include <ctype.h> /* for isspace, etc. */
#include <assert.h>
#include <string.h>
#include <stdlib.h> /* malloc */
#include "unicode/utypes.h" /* Basic ICU data types */
#include "unicode/ucnv.h" /* C Converter API */
#include "unicode/ustring.h" /* some more string fcns*/
#include "unicode/uchar.h" /* char names */
#include "unicode/uloc.h"
#include "unicode/unistr.h"
#include "unicode/putil.h" // u_setDataDirectory
#include "flagcb.h"
#include "layout/LETypes.h" // Test include, do not remove
/* Some utility functions */
static const UChar kNone[] = { 0x0000 };
/* Print a UChar if possible, in seven characters. */
void prettyPrintUChar(UChar c)
{
if( (c <= 0x007F) &&
(isgraph(c)) ) {
printf(" '%c' ", (char)(0x00FF&c));
} else if ( c > 0x007F ) {
char buf[1000];
UErrorCode status = U_ZERO_ERROR;
int32_t o;
o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
if(U_SUCCESS(status) && (o>0) ) {
buf[6] = 0;
printf("%7s", buf);
} else {
printf(" ??????");
}
} else {
switch((char)(c & 0x007F)) {
case ' ':
printf(" ' ' ");
break;
case '\t':
printf(" \\t ");
break;
case '\n':
printf(" \\n ");
break;
default:
printf(" _ ");
break;
}
}
}
void printUChars(const char *name = "?",
const UChar *uch = kNone,
int32_t len = -1 )
{
int32_t i;
if( (len == -1) && (uch) ) {
len = u_strlen(uch);
}
printf("%5s: ", name);
for( i = 0; i <len; i++) {
printf("%-6d ", i);
}
printf("\n");
printf("%5s: ", "uni");
for( i = 0; i <len; i++) {
printf("\\u%04X ", (int)uch[i]);
}
printf("\n");
printf("%5s:", "ch");
for( i = 0; i <len; i++) {
prettyPrintUChar(uch[i]);
}
printf("\n");
}
void printBytes(const char *name = "?",
const char *uch = "",
int32_t len = -1 )
{
int32_t i;
if( (len == -1) && (uch) ) {
len = strlen(uch);
}
printf("%5s: ", name);
for( i = 0; i <len; i++) {
printf("%-4d ", i);
}
printf("\n");
printf("%5s: ", "uni");
for( i = 0; i <len; i++) {
printf("\\x%02X ", 0x00FF & (int)uch[i]);
}
printf("\n");
printf("%5s:", "ch");
for( i = 0; i <len; i++) {
if(isgraph(0x00FF & (int)uch[i])) {
printf(" '%c' ", (char)uch[i]);
} else {
printf(" ");
}
}
printf("\n");
}
/*******************************************************************
Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
followed by an exclamation mark (!) into the KOI8-R Russian code page.
This example first creates a UChar String out of the Unicode chars.
targetSize must be set to the amount of space available in the target
buffer. After fromUChars is called,
len will contain the number of bytes in target[] which were
used in the resulting codepage. In this case, there is a 1:1 mapping
between the input and output characters. The exclamation mark has the
same value in both KOI8-R and Unicode.
src: 0 1 2 3 4 5 6
uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
targ: 0 1 2 3 4 5 6
uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
ch: '!'
Converting FROM unicode
to koi8-r.
You must call ucnv_close to clean up the memory used by the
converter.
'len' returns the number of OUTPUT bytes resulting from the
conversion.
*/
UErrorCode convsample_02()
{
printf("\n\n==============================================\n"
"Sample 02: C: simple Unicode -> koi8-r conversion\n");
// **************************** START SAMPLE *******************
// "cat<cat>OK"
UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
0x0430, 0x0021, 0x0000 };
char target[100];
UErrorCode status = U_ZERO_ERROR;
UConverter *conv;
int32_t len;
// set up the converter
//! [ucnv_open]
conv = ucnv_open("koi8-r", &status);
//! [ucnv_open]
assert(U_SUCCESS(status));
// convert to koi8-r
len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
assert(U_SUCCESS(status));
// close the converter
ucnv_close(conv);
// ***************************** END SAMPLE ********************
// Print it out
printUChars("src", source);
printf("\n");
printBytes("targ", target, len);
return U_ZERO_ERROR;
}
int gauze_main(int argc, char** argv)
{
#if defined(ARCHIVE_MODE)
if (argc != 2) {
printf("Expected 2 arguments, %d given\n", argc);
return 1;
}
// Path will be passed in CMake format, it will be the same
// for all platforms.
const char* dirSeparator = "/";
const std::string dataFile = argv[1];
const std::string::size_type index = dataFile.find_last_of(dirSeparator);
if (index == std::string::npos) {
printf("Symbol not found");
return 1;
}
const std::string dataDir = dataFile.substr(0, index);
u_setDataDirectory(dataDir.c_str());
#endif
printf("Default Converter=%s\n", ucnv_getDefaultName() );
convsample_02(); // C , u->koi8r, conv
printf("End of converter samples.\n");
fflush(stdout);
fflush(stderr);
return 0;
}