Ruby  2.7.1p83(2020-03-31revisiona0c7c23c9cec0d0ffcba012279cd652d28ad5bf3)
nkf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3  * Copyright (c) 1996-2018, The nkf Project.
4  *
5  * This software is provided 'as-is', without any express or implied
6  * warranty. In no event will the authors be held liable for any damages
7  * arising from the use of this software.
8  *
9  * Permission is granted to anyone to use this software for any purpose,
10  * including commercial applications, and to alter it and redistribute it
11  * freely, subject to the following restrictions:
12  *
13  * 1. The origin of this software must not be misrepresented; you must not
14  * claim that you wrote the original software. If you use this software
15  * in a product, an acknowledgment in the product documentation would be
16  * appreciated but is not required.
17  *
18  * 2. Altered source versions must be plainly marked as such, and must not be
19  * misrepresented as being the original software.
20  *
21  * 3. This notice may not be removed or altered from any source distribution.
22  */
23 #define NKF_VERSION "2.1.5"
24 #define NKF_RELEASE_DATE "2018-12-15"
25 #define COPY_RIGHT \
26  "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27  "Copyright (C) 1996-2018, The nkf Project."
28 
29 #include "config.h"
30 #include "nkf.h"
31 #include "utf8tbl.h"
32 #ifdef __WIN32__
33 #include <windows.h>
34 #include <locale.h>
35 #endif
36 #if defined(__OS2__)
37 # define INCL_DOS
38 # define INCL_DOSERRORS
39 # include <os2.h>
40 #endif
41 #include <assert.h>
42 
43 
44 /* state of output_mode and input_mode
45 
46  c2 0 means ASCII
47  JIS_X_0201_1976_K
48  ISO_8859_1
49  JIS_X_0208
50  EOF all termination
51  c1 32bit data
52 
53  */
54 
55 /* MIME ENCODE */
56 
57 #define FIXED_MIME 7
58 #define STRICT_MIME 8
59 
60 /* byte order */
61 enum byte_order {
66 };
67 
68 /* ASCII CODE */
69 
70 #define BS 0x08
71 #define TAB 0x09
72 #define LF 0x0a
73 #define CR 0x0d
74 #define ESC 0x1b
75 #define SP 0x20
76 #define DEL 0x7f
77 #define SI 0x0f
78 #define SO 0x0e
79 #define SS2 0x8e
80 #define SS3 0x8f
81 #define CRLF 0x0D0A
82 
83 
84 /* encodings */
85 
124  JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125  /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126  /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127  /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128  JIS_X_0208 = 0x1168, /* @B */
129  JIS_X_0212 = 0x1159, /* D */
130  /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131  JIS_X_0213_2 = 0x1229, /* P */
132  JIS_X_0213_1 = 0x1233 /* Q */
133 };
134 
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
146 
147 typedef struct {
148  const char *name;
149  nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150  void (*oconv)(nkf_char c2, nkf_char c1);
152 
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
160 
161 typedef struct {
162  const int id;
163  const char *name;
165 } nkf_encoding;
166 
168  {ASCII, "US-ASCII", &NkfEncodingASCII},
169  {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170  {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171  {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172  {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173  {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174  {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175  {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176  {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177  {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178  {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179  {CP10001, "CP10001", &NkfEncodingShift_JIS},
180  {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181  {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182  {CP51932, "CP51932", &NkfEncodingEUC_JP},
183  {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184  {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185  {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186  {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187  {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188  {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189  {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190  {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191  {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192  {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193  {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194  {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195  {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196  {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197  {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198  {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199  {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200  {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201  {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202  {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203  {BINARY, "BINARY", &NkfEncodingASCII},
204  {-1, NULL, NULL}
205 };
206 
207 struct {
208  const char *name;
209  const int id;
211  {"US-ASCII", ASCII},
212  {"ASCII", ASCII},
213  {"646", ASCII},
214  {"ROMAN8", ASCII},
215  {"ISO-2022-JP", ISO_2022_JP},
216  {"ISO2022JP-CP932", CP50220},
217  {"CP50220", CP50220},
218  {"CP50221", CP50221},
219  {"CSISO2022JP", CP50221},
220  {"CP50222", CP50222},
221  {"ISO-2022-JP-1", ISO_2022_JP_1},
222  {"ISO-2022-JP-3", ISO_2022_JP_3},
223  {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224  {"SHIFT_JIS", SHIFT_JIS},
225  {"SJIS", SHIFT_JIS},
226  {"MS_Kanji", SHIFT_JIS},
227  {"PCK", SHIFT_JIS},
228  {"WINDOWS-31J", WINDOWS_31J},
229  {"CSWINDOWS31J", WINDOWS_31J},
230  {"CP932", WINDOWS_31J},
231  {"MS932", WINDOWS_31J},
232  {"CP10001", CP10001},
233  {"EUCJP", EUC_JP},
234  {"EUC-JP", EUC_JP},
235  {"EUCJP-NKF", EUCJP_NKF},
236  {"CP51932", CP51932},
237  {"EUC-JP-MS", EUCJP_MS},
238  {"EUCJP-MS", EUCJP_MS},
239  {"EUCJPMS", EUCJP_MS},
240  {"EUC-JP-ASCII", EUCJP_ASCII},
241  {"EUCJP-ASCII", EUCJP_ASCII},
242  {"SHIFT_JISX0213", SHIFT_JISX0213},
243  {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244  {"EUC-JISX0213", EUC_JISX0213},
245  {"EUC-JIS-2004", EUC_JIS_2004},
246  {"UTF-8", UTF_8},
247  {"UTF-8N", UTF_8N},
248  {"UTF-8-BOM", UTF_8_BOM},
249  {"UTF8-MAC", UTF8_MAC},
250  {"UTF-8-MAC", UTF8_MAC},
251  {"UTF-16", UTF_16},
252  {"UTF-16BE", UTF_16BE},
253  {"UTF-16BE-BOM", UTF_16BE_BOM},
254  {"UTF-16LE", UTF_16LE},
255  {"UTF-16LE-BOM", UTF_16LE_BOM},
256  {"UTF-32", UTF_32},
257  {"UTF-32BE", UTF_32BE},
258  {"UTF-32BE-BOM", UTF_32BE_BOM},
259  {"UTF-32LE", UTF_32LE},
260  {"UTF-32LE-BOM", UTF_32LE_BOM},
261  {"BINARY", BINARY},
262  {NULL, -1}
263 };
264 
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
275 #endif
276 
277 
278 #define is_alnum(c) \
279  (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280 
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293  ('A'<=c&&c<='F') ? (c-'A'+10) : \
294  ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298  ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299  && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300 
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303 
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
307 #else
308 #define IOBUF_SIZE 16384
309 #endif
310 
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
313 
314 
315 #define GETA1 0x22
316 #define GETA2 0x2e
317 
318 
319 /* MIME preprocessor */
320 
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
323 #endif
324 
325 struct input_code{
326  const char *name;
334 };
335 
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337 static nkf_encoding *input_encoding = NULL;
338 static nkf_encoding *output_encoding = NULL;
339 
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341 /* UCS Mapping
342  * 0: Shift_JIS, eucJP-ascii
343  * 1: eucJP-ms
344  * 2: CP932, CP51932
345  * 3: CP10001
346  */
347 #define UCS_MAP_ASCII 0
348 #define UCS_MAP_MS 1
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
352 #endif
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static int no_best_fit_chars_f = FALSE;
358 static int input_endian = ENDIAN_BIG;
359 static int input_bom_f = FALSE;
360 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
361 static void (*encode_fallback)(nkf_char c) = NULL;
362 static void w_status(struct input_code *, nkf_char);
363 #endif
364 #ifdef UTF8_OUTPUT_ENABLE
365 static int output_bom_f = FALSE;
366 static int output_endian = ENDIAN_BIG;
367 #endif
368 
369 static void std_putc(nkf_char c);
370 static nkf_char std_getc(FILE *f);
371 static nkf_char std_ungetc(nkf_char c,FILE *f);
372 
373 static nkf_char broken_getc(FILE *f);
374 static nkf_char broken_ungetc(nkf_char c,FILE *f);
375 
376 static nkf_char mime_getc(FILE *f);
377 
378 static void mime_putc(nkf_char c);
379 
380 /* buffers */
381 
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
383 static unsigned char stdibuf[IOBUF_SIZE];
384 static unsigned char stdobuf[IOBUF_SIZE];
385 #endif
386 
387 #define NKF_UNSPECIFIED (-TRUE)
388 
389 /* flags */
390 static int unbuf_f = FALSE;
391 static int estab_f = FALSE;
392 static int nop_f = FALSE;
393 static int binmode_f = TRUE; /* binary mode */
394 static int rot_f = FALSE; /* rot14/43 mode */
395 static int hira_f = FALSE; /* hira/kata henkan */
396 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
397 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
398 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
399 static int mimebuf_f = FALSE; /* MIME buffered input */
400 static int broken_f = FALSE; /* convert ESC-less broken JIS */
401 static int iso8859_f = FALSE; /* ISO8859 through */
402 static int mimeout_f = FALSE; /* base64 mode */
403 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
404 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
405 
406 #ifdef UNICODE_NORMALIZATION
407 static int nfc_f = FALSE;
408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
410 #endif
411 
412 #ifdef INPUT_OPTION
413 static int cap_f = FALSE;
414 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416 
417 static int url_f = FALSE;
418 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
420 #endif
421 
422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423 #define CLASS_MASK NKF_INT32_C(0xFF000000)
424 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 
434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
435 
436 #ifdef NUMCHAR_OPTION
437 static int numchar_f = FALSE;
438 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
439 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440 #endif
441 
442 #ifdef CHECK_OPTION
443 static int noout_f = FALSE;
444 static void no_putc(nkf_char c);
445 static int debug_f = FALSE;
446 static void debug(const char *str);
447 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
448 #endif
449 
450 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
451 static void set_input_codename(const char *codename);
452 
453 #ifdef EXEC_IO
454 static int exec_f = 0;
455 #endif
456 
457 #ifdef SHIFTJIS_CP932
458 /* invert IBM extended characters to others */
459 static int cp51932_f = FALSE;
460 
461 /* invert NEC-selected IBM extended characters to IBM extended characters */
462 static int cp932inv_f = TRUE;
463 
464 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
465 #endif /* SHIFTJIS_CP932 */
466 
467 static int x0212_f = FALSE;
468 static int x0213_f = FALSE;
469 
470 static unsigned char prefix_table[256];
471 
472 static void e_status(struct input_code *, nkf_char);
473 static void s_status(struct input_code *, nkf_char);
474 
476  {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477  {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478 #ifdef UTF8_INPUT_ENABLE
479  {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480  {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
481  {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482 #endif
483  {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484 };
485 
486 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int base64_count = 0;
488 
489 /* X0208 -> ASCII converter */
490 
491 /* fold parameter */
492 static int f_line = 0; /* chars in line */
493 static int f_prev = 0;
494 static int fold_preserve_f = FALSE; /* preserve new lines */
495 static int fold_f = FALSE;
496 static int fold_len = 0;
497 
498 /* options */
499 static unsigned char kanji_intro = DEFAULT_J;
500 static unsigned char ascii_intro = DEFAULT_R;
501 
502 /* Folding */
503 
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
506 
507 static int fold_margin = FOLD_MARGIN;
508 
509 /* process default */
510 
511 static nkf_char
512 no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
513 {
514  fprintf(stderr,"nkf internal module connection failure.\n");
516  return 0; /* LINT */
517 }
518 
519 static void
520 no_connection(nkf_char c2, nkf_char c1)
521 {
522  no_connection2(c2,c1,0);
523 }
524 
525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
527 
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
535 
536 /* static redirections */
537 
538 static void (*o_putc)(nkf_char c) = std_putc;
539 
540 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
541 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
542 
543 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
545 
546 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
547 
548 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
550 
551 /* for strict mime */
552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
554 
555 /* Global states */
556 static int output_mode = ASCII; /* output kanji mode */
557 static int input_mode = ASCII; /* input kanji mode */
558 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
559 
560 /* X0201 / X0208 conversion tables */
561 
562 /* X0201 kana conversion table */
563 /* 90-9F A0-DF */
564 static const unsigned char cv[]= {
565  0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566  0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567  0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568  0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569  0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570  0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571  0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572  0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573  0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574  0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575  0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576  0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577  0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578  0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579  0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580  0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581  0x00,0x00};
582 
583 
584 /* X0201 kana conversion table for daguten */
585 /* 90-9F A0-DF */
586 static const unsigned char dv[]= {
587  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591  0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592  0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593  0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594  0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595  0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596  0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597  0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598  0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603  0x00,0x00};
604 
605 /* X0201 kana conversion table for han-daguten */
606 /* 90-9F A0-DF */
607 static const unsigned char ev[]= {
608  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618  0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619  0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624  0x00,0x00};
625 
626 /* X0201 kana to X0213 conversion table for han-daguten */
627 /* 90-9F A0-DF */
628 static const unsigned char ev_x0213[]= {
629  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634  0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635  0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636  0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637  0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638  0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645  0x00,0x00};
646 
647 
648 /* X0208 kigou conversion table */
649 /* 0x8140 - 0x819e */
650 static const unsigned char fv[] = {
651 
652  0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653  0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654  0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655  0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656  0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657  0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658  0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659  0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660  0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662  0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
664 } ;
665 
666 
667 
668 static int option_mode = 0;
669 static int file_out_f = FALSE;
670 #ifdef OVERWRITE
671 static int overwrite_f = FALSE;
672 static int preserve_time_f = FALSE;
673 static int backup_f = FALSE;
674 static char *backup_suffix = "";
675 #endif
676 
677 static int eolmode_f = 0; /* CR, LF, CRLF */
678 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
679 static nkf_char prev_cr = 0; /* CR or 0 */
680 #ifdef EASYWIN /*Easy Win */
681 static int end_check;
682 #endif /*Easy Win */
683 
684 static void *
685 nkf_xmalloc(size_t size)
686 {
687  void *ptr;
688 
689  if (size == 0) size = 1;
690 
691  ptr = malloc(size);
692  if (ptr == NULL) {
693  perror("can't malloc");
695  }
696 
697  return ptr;
698 }
699 
700 static void *
701 nkf_xrealloc(void *ptr, size_t size)
702 {
703  if (size == 0) size = 1;
704 
705  ptr = realloc(ptr, size);
706  if (ptr == NULL) {
707  perror("can't realloc");
709  }
710 
711  return ptr;
712 }
713 
714 #define nkf_xfree(ptr) free(ptr)
715 
716 static int
717 nkf_str_caseeql(const char *src, const char *target)
718 {
719  int i;
720  for (i = 0; src[i] && target[i]; i++) {
721  if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
722  }
723  if (src[i] || target[i]) return FALSE;
724  else return TRUE;
725 }
726 
727 static nkf_encoding*
728 nkf_enc_from_index(int idx)
729 {
730  if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
731  return 0;
732  }
733  return &nkf_encoding_table[idx];
734 }
735 
736 static int
737 nkf_enc_find_index(const char *name)
738 {
739  int i;
740  if (name[0] == 'X' && *(name+1) == '-') name += 2;
741  for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
742  if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
743  return encoding_name_to_id_table[i].id;
744  }
745  }
746  return -1;
747 }
748 
749 static nkf_encoding*
750 nkf_enc_find(const char *name)
751 {
752  int idx = -1;
753  idx = nkf_enc_find_index(name);
754  if (idx < 0) return 0;
755  return nkf_enc_from_index(idx);
756 }
757 
758 #define nkf_enc_name(enc) (enc)->name
759 #define nkf_enc_to_index(enc) (enc)->id
760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763 #define nkf_enc_asciicompat(enc) (\
764  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766 #define nkf_enc_unicode_p(enc) (\
767  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770 #define nkf_enc_cp5022x_p(enc) (\
771  nkf_enc_to_index(enc) == CP50220 ||\
772  nkf_enc_to_index(enc) == CP50221 ||\
773  nkf_enc_to_index(enc) == CP50222)
774 
775 #ifdef DEFAULT_CODE_LOCALE
776 static const char*
777 nkf_locale_charmap(void)
778 {
779 #ifdef HAVE_LANGINFO_H
780  return nl_langinfo(CODESET);
781 #elif defined(__WIN32__)
782  static char buf[16];
783  sprintf(buf, "CP%d", GetACP());
784  return buf;
785 #elif defined(__OS2__)
786 # if defined(INT_IS_SHORT)
787  /* OS/2 1.x */
788  return NULL;
789 # else
790  /* OS/2 32bit */
791  static char buf[16];
792  ULONG ulCP[1], ulncp;
793  DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
794  if (ulCP[0] == 932 || ulCP[0] == 943)
795  strcpy(buf, "Shift_JIS");
796  else
797  sprintf(buf, "CP%lu", ulCP[0]);
798  return buf;
799 # endif
800 #endif
801  return NULL;
802 }
803 
804 static nkf_encoding*
805 nkf_locale_encoding(void)
806 {
807  nkf_encoding *enc = 0;
808  const char *encname = nkf_locale_charmap();
809  if (encname)
810  enc = nkf_enc_find(encname);
811  return enc;
812 }
813 #endif /* DEFAULT_CODE_LOCALE */
814 
815 static nkf_encoding*
816 nkf_utf8_encoding(void)
817 {
818  return &nkf_encoding_table[UTF_8];
819 }
820 
821 static nkf_encoding*
822 nkf_default_encoding(void)
823 {
824  nkf_encoding *enc = 0;
825 #ifdef DEFAULT_CODE_LOCALE
826  enc = nkf_locale_encoding();
827 #elif defined(DEFAULT_ENCIDX)
828  enc = nkf_enc_from_index(DEFAULT_ENCIDX);
829 #endif
830  if (!enc) enc = nkf_utf8_encoding();
831  return enc;
832 }
833 
834 typedef struct {
835  long capa;
836  long len;
838 } nkf_buf_t;
839 
840 static nkf_buf_t *
841 nkf_buf_new(int length)
842 {
843  nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
844  buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
845  buf->capa = length;
846  buf->len = 0;
847  return buf;
848 }
849 
850 #if 0
851 static void
852 nkf_buf_dispose(nkf_buf_t *buf)
853 {
854  nkf_xfree(buf->ptr);
855  nkf_xfree(buf);
856 }
857 #endif
858 
859 #define nkf_buf_length(buf) ((buf)->len)
860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
861 
862 static nkf_char
863 nkf_buf_at(nkf_buf_t *buf, int index)
864 {
865  assert(index <= buf->len);
866  return buf->ptr[index];
867 }
868 
869 static void
870 nkf_buf_clear(nkf_buf_t *buf)
871 {
872  buf->len = 0;
873 }
874 
875 static void
876 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
877 {
878  if (buf->capa <= buf->len) {
880  }
881  buf->ptr[buf->len++] = c;
882 }
883 
884 static nkf_char
885 nkf_buf_pop(nkf_buf_t *buf)
886 {
888  return buf->ptr[--buf->len];
889 }
890 
891 /* Normalization Form C */
892 #ifndef PERL_XS
893 #ifdef WIN32DLL
894 #define fprintf dllprintf
895 #endif
896 
897 static void
898 version(void)
899 {
900  fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
901 }
902 
903 static void
904 usage(void)
905 {
907  "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
908 #ifdef UTF8_OUTPUT_ENABLE
909  " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910  " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
911 #else
912 #endif
913 #ifdef UTF8_INPUT_ENABLE
914  " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915  " UTF option is -W[8,[16,32][B,L]]\n"
916 #else
917  " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
918 #endif
919  );
921  " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922  " M[BQ] MIME encode [B:base64 Q:quoted]\n"
923  " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
924  );
926  " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
927  " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
928  " 4: JISX0208 Katakana to JISX0201 Katakana\n"
929  " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
930  );
932  " O Output to File (DEFAULT 'nkf.out')\n"
933  " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
934  );
936  " --ic=<encoding> Specify the input encoding\n"
937  " --oc=<encoding> Specify the output encoding\n"
938  " --hiragana --katakana Hiragana/Katakana Conversion\n"
939  " --katakana-hiragana Converts each other\n"
940  );
942 #ifdef INPUT_OPTION
943  " --{cap, url}-input Convert hex after ':' or '%%'\n"
944 #endif
945 #ifdef NUMCHAR_OPTION
946  " --numchar-input Convert Unicode Character Reference\n"
947 #endif
948 #ifdef UTF8_INPUT_ENABLE
949  " --fb-{skip, html, xml, perl, java, subchar}\n"
950  " Specify unassigned character's replacement\n"
951 #endif
952  );
954 #ifdef OVERWRITE
955  " --in-place[=SUF] Overwrite original files\n"
956  " --overwrite[=SUF] Preserve timestamp of original files\n"
957 #endif
958  " -g --guess Guess the input code\n"
959  " -v --version Print the version\n"
960  " --help/-V Print this help / configuration\n"
961  );
962  version();
963 }
964 
965 static void
966 show_configuration(void)
967 {
969  "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
970  " Compile-time options:\n"
971  " Compiled at: " __DATE__ " " __TIME__ "\n"
972  );
974  " Default output encoding: "
975 #ifdef DEFAULT_CODE_LOCALE
976  "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
977 #elif defined(DEFAULT_ENCIDX)
978  "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
979 #else
980  "NONE\n"
981 #endif
982  );
984  " Default output end of line: "
985 #if DEFAULT_NEWLINE == CR
986  "CR"
987 #elif DEFAULT_NEWLINE == CRLF
988  "CRLF"
989 #else
990  "LF"
991 #endif
992  "\n"
993  " Decode MIME encoded string: "
995  "ON"
996 #else
997  "OFF"
998 #endif
999  "\n"
1000  " Convert JIS X 0201 Katakana: "
1001 #if X0201_DEFAULT
1002  "ON"
1003 #else
1004  "OFF"
1005 #endif
1006  "\n"
1007  " --help, --version output: "
1008 #if HELP_OUTPUT_HELP_OUTPUT
1009  "HELP_OUTPUT"
1010 #else
1011  "STDOUT"
1012 #endif
1013  "\n");
1014 }
1015 #endif /*PERL_XS*/
1016 
1017 #ifdef OVERWRITE
1018 static char*
1019 get_backup_filename(const char *suffix, const char *filename)
1020 {
1021  char *backup_filename;
1022  int asterisk_count = 0;
1023  int i, j;
1024  int filename_length = strlen(filename);
1025 
1026  for(i = 0; suffix[i]; i++){
1027  if(suffix[i] == '*') asterisk_count++;
1028  }
1029 
1030  if(asterisk_count){
1031  backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032  for(i = 0, j = 0; suffix[i];){
1033  if(suffix[i] == '*'){
1034  backup_filename[j] = '\0';
1035  strncat(backup_filename, filename, filename_length);
1036  i++;
1037  j += filename_length;
1038  }else{
1039  backup_filename[j++] = suffix[i++];
1040  }
1041  }
1042  backup_filename[j] = '\0';
1043  }else{
1044  j = filename_length + strlen(suffix);
1045  backup_filename = nkf_xmalloc(j + 1);
1046  strcpy(backup_filename, filename);
1047  strcat(backup_filename, suffix);
1048  backup_filename[j] = '\0';
1049  }
1050  return backup_filename;
1051 }
1052 #endif
1053 
1054 #ifdef UTF8_INPUT_ENABLE
1055 static void
1056 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1057 {
1058  int shift = 20;
1059  c &= VALUE_MASK;
1060  while(shift >= 0){
1061  if(c >= NKF_INT32_C(1)<<shift){
1062  while(shift >= 0){
1063  (*f)(0, bin2hex(c>>shift));
1064  shift -= 4;
1065  }
1066  }else{
1067  shift -= 4;
1068  }
1069  }
1070  return;
1071 }
1072 
1073 static void
1074 encode_fallback_html(nkf_char c)
1075 {
1076  (*oconv)(0, '&');
1077  (*oconv)(0, '#');
1078  c &= VALUE_MASK;
1079  if(c >= NKF_INT32_C(1000000))
1080  (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1081  if(c >= NKF_INT32_C(100000))
1082  (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1083  if(c >= 10000)
1084  (*oconv)(0, 0x30+(c/10000 )%10);
1085  if(c >= 1000)
1086  (*oconv)(0, 0x30+(c/1000 )%10);
1087  if(c >= 100)
1088  (*oconv)(0, 0x30+(c/100 )%10);
1089  if(c >= 10)
1090  (*oconv)(0, 0x30+(c/10 )%10);
1091  if(c >= 0)
1092  (*oconv)(0, 0x30+ c %10);
1093  (*oconv)(0, ';');
1094  return;
1095 }
1096 
1097 static void
1098 encode_fallback_xml(nkf_char c)
1099 {
1100  (*oconv)(0, '&');
1101  (*oconv)(0, '#');
1102  (*oconv)(0, 'x');
1103  nkf_each_char_to_hex(oconv, c);
1104  (*oconv)(0, ';');
1105  return;
1106 }
1107 
1108 static void
1109 encode_fallback_java(nkf_char c)
1110 {
1111  (*oconv)(0, '\\');
1112  c &= VALUE_MASK;
1113  if(!nkf_char_unicode_bmp_p(c)){
1114  int high = (c >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
1115  int low = (c & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
1116  (*oconv)(0, 'u');
1117  (*oconv)(0, bin2hex(high>>12));
1118  (*oconv)(0, bin2hex(high>> 8));
1119  (*oconv)(0, bin2hex(high>> 4));
1120  (*oconv)(0, bin2hex(high ));
1121  (*oconv)(0, '\\');
1122  (*oconv)(0, 'u');
1123  (*oconv)(0, bin2hex(low>>12));
1124  (*oconv)(0, bin2hex(low>> 8));
1125  (*oconv)(0, bin2hex(low>> 4));
1126  (*oconv)(0, bin2hex(low ));
1127  }else{
1128  (*oconv)(0, 'u');
1129  (*oconv)(0, bin2hex(c>>12));
1130  (*oconv)(0, bin2hex(c>> 8));
1131  (*oconv)(0, bin2hex(c>> 4));
1132  (*oconv)(0, bin2hex(c ));
1133  }
1134  return;
1135 }
1136 
1137 static void
1138 encode_fallback_perl(nkf_char c)
1139 {
1140  (*oconv)(0, '\\');
1141  (*oconv)(0, 'x');
1142  (*oconv)(0, '{');
1143  nkf_each_char_to_hex(oconv, c);
1144  (*oconv)(0, '}');
1145  return;
1146 }
1147 
1148 static void
1149 encode_fallback_subchar(nkf_char c)
1150 {
1151  c = unicode_subchar;
1152  (*oconv)((c>>8)&0xFF, c&0xFF);
1153  return;
1154 }
1155 #endif
1156 
1157 static const struct {
1158  const char *name;
1159  const char *alias;
1160 } long_option[] = {
1161  {"ic=", ""},
1162  {"oc=", ""},
1163  {"base64","jMB"},
1164  {"euc","e"},
1165  {"euc-input","E"},
1166  {"fj","jm"},
1167  {"help",""},
1168  {"jis","j"},
1169  {"jis-input","J"},
1170  {"mac","sLm"},
1171  {"mime","jM"},
1172  {"mime-input","m"},
1173  {"msdos","sLw"},
1174  {"sjis","s"},
1175  {"sjis-input","S"},
1176  {"unix","eLu"},
1177  {"version","v"},
1178  {"windows","sLw"},
1179  {"hiragana","h1"},
1180  {"katakana","h2"},
1181  {"katakana-hiragana","h3"},
1182  {"guess=", ""},
1183  {"guess", "g2"},
1184  {"cp932", ""},
1185  {"no-cp932", ""},
1186 #ifdef X0212_ENABLE
1187  {"x0212", ""},
1188 #endif
1189 #ifdef UTF8_OUTPUT_ENABLE
1190  {"utf8", "w"},
1191  {"utf16", "w16"},
1192  {"ms-ucs-map", ""},
1193  {"fb-skip", ""},
1194  {"fb-html", ""},
1195  {"fb-xml", ""},
1196  {"fb-perl", ""},
1197  {"fb-java", ""},
1198  {"fb-subchar", ""},
1199  {"fb-subchar=", ""},
1200 #endif
1201 #ifdef UTF8_INPUT_ENABLE
1202  {"utf8-input", "W"},
1203  {"utf16-input", "W16"},
1204  {"no-cp932ext", ""},
1205  {"no-best-fit-chars",""},
1206 #endif
1207 #ifdef UNICODE_NORMALIZATION
1208  {"utf8mac-input", ""},
1209 #endif
1210 #ifdef OVERWRITE
1211  {"overwrite", ""},
1212  {"overwrite=", ""},
1213  {"in-place", ""},
1214  {"in-place=", ""},
1215 #endif
1216 #ifdef INPUT_OPTION
1217  {"cap-input", ""},
1218  {"url-input", ""},
1219 #endif
1220 #ifdef NUMCHAR_OPTION
1221  {"numchar-input", ""},
1222 #endif
1223 #ifdef CHECK_OPTION
1224  {"no-output", ""},
1225  {"debug", ""},
1226 #endif
1227 #ifdef SHIFTJIS_CP932
1228  {"cp932inv", ""},
1229 #endif
1230 #ifdef EXEC_IO
1231  {"exec-in", ""},
1232  {"exec-out", ""},
1233 #endif
1234  {"prefix=", ""},
1235 };
1236 
1237 static void
1238 set_input_encoding(nkf_encoding *enc)
1239 {
1240  switch (nkf_enc_to_index(enc)) {
1241  case ISO_8859_1:
1242  iso8859_f = TRUE;
1243  break;
1244  case CP50221:
1245  case CP50222:
1246  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1247  case CP50220:
1248 #ifdef SHIFTJIS_CP932
1249  cp51932_f = TRUE;
1250 #endif
1251 #ifdef UTF8_OUTPUT_ENABLE
1252  ms_ucs_map_f = UCS_MAP_CP932;
1253 #endif
1254  break;
1255  case ISO_2022_JP_1:
1256  x0212_f = TRUE;
1257  break;
1258  case ISO_2022_JP_3:
1259  x0212_f = TRUE;
1260  x0213_f = TRUE;
1261  break;
1262  case ISO_2022_JP_2004:
1263  x0212_f = TRUE;
1264  x0213_f = TRUE;
1265  break;
1266  case SHIFT_JIS:
1267  break;
1268  case WINDOWS_31J:
1269  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1270 #ifdef SHIFTJIS_CP932
1271  cp51932_f = TRUE;
1272 #endif
1273 #ifdef UTF8_OUTPUT_ENABLE
1274  ms_ucs_map_f = UCS_MAP_CP932;
1275 #endif
1276  break;
1277  break;
1278  case CP10001:
1279 #ifdef SHIFTJIS_CP932
1280  cp51932_f = TRUE;
1281 #endif
1282 #ifdef UTF8_OUTPUT_ENABLE
1283  ms_ucs_map_f = UCS_MAP_CP10001;
1284 #endif
1285  break;
1286  case EUC_JP:
1287  break;
1288  case EUCJP_NKF:
1289  break;
1290  case CP51932:
1291  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1292 #ifdef SHIFTJIS_CP932
1293  cp51932_f = TRUE;
1294 #endif
1295 #ifdef UTF8_OUTPUT_ENABLE
1296  ms_ucs_map_f = UCS_MAP_CP932;
1297 #endif
1298  break;
1299  case EUCJP_MS:
1300  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1301 #ifdef SHIFTJIS_CP932
1302  cp51932_f = FALSE;
1303 #endif
1304 #ifdef UTF8_OUTPUT_ENABLE
1305  ms_ucs_map_f = UCS_MAP_MS;
1306 #endif
1307  break;
1308  case EUCJP_ASCII:
1309  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1310 #ifdef SHIFTJIS_CP932
1311  cp51932_f = FALSE;
1312 #endif
1313 #ifdef UTF8_OUTPUT_ENABLE
1314  ms_ucs_map_f = UCS_MAP_ASCII;
1315 #endif
1316  break;
1317  case SHIFT_JISX0213:
1318  case SHIFT_JIS_2004:
1319  x0213_f = TRUE;
1320 #ifdef SHIFTJIS_CP932
1321  cp51932_f = FALSE;
1322  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1323 #endif
1324  break;
1325  case EUC_JISX0213:
1326  case EUC_JIS_2004:
1327  x0213_f = TRUE;
1328 #ifdef SHIFTJIS_CP932
1329  cp51932_f = FALSE;
1330 #endif
1331  break;
1332 #ifdef UTF8_INPUT_ENABLE
1333 #ifdef UNICODE_NORMALIZATION
1334  case UTF8_MAC:
1335  nfc_f = TRUE;
1336  break;
1337 #endif
1338  case UTF_16:
1339  case UTF_16BE:
1340  case UTF_16BE_BOM:
1341  input_endian = ENDIAN_BIG;
1342  break;
1343  case UTF_16LE:
1344  case UTF_16LE_BOM:
1345  input_endian = ENDIAN_LITTLE;
1346  break;
1347  case UTF_32:
1348  case UTF_32BE:
1349  case UTF_32BE_BOM:
1350  input_endian = ENDIAN_BIG;
1351  break;
1352  case UTF_32LE:
1353  case UTF_32LE_BOM:
1354  input_endian = ENDIAN_LITTLE;
1355  break;
1356 #endif
1357  }
1358 }
1359 
1360 static void
1361 set_output_encoding(nkf_encoding *enc)
1362 {
1363  switch (nkf_enc_to_index(enc)) {
1364  case CP50220:
1365 #ifdef SHIFTJIS_CP932
1366  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1367 #endif
1368 #ifdef UTF8_OUTPUT_ENABLE
1369  ms_ucs_map_f = UCS_MAP_CP932;
1370 #endif
1371  break;
1372  case CP50221:
1373  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1374 #ifdef SHIFTJIS_CP932
1375  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376 #endif
1377 #ifdef UTF8_OUTPUT_ENABLE
1378  ms_ucs_map_f = UCS_MAP_CP932;
1379 #endif
1380  break;
1381  case ISO_2022_JP:
1382 #ifdef SHIFTJIS_CP932
1383  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1384 #endif
1385  break;
1386  case ISO_2022_JP_1:
1387  x0212_f = TRUE;
1388 #ifdef SHIFTJIS_CP932
1389  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390 #endif
1391  break;
1392  case ISO_2022_JP_3:
1393  case ISO_2022_JP_2004:
1394  x0212_f = TRUE;
1395  x0213_f = TRUE;
1396 #ifdef SHIFTJIS_CP932
1397  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1398 #endif
1399  break;
1400  case SHIFT_JIS:
1401  break;
1402  case WINDOWS_31J:
1403  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1404 #ifdef UTF8_OUTPUT_ENABLE
1405  ms_ucs_map_f = UCS_MAP_CP932;
1406 #endif
1407  break;
1408  case CP10001:
1409 #ifdef UTF8_OUTPUT_ENABLE
1410  ms_ucs_map_f = UCS_MAP_CP10001;
1411 #endif
1412  break;
1413  case EUC_JP:
1414  x0212_f = TRUE;
1415 #ifdef SHIFTJIS_CP932
1416  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1417 #endif
1418 #ifdef UTF8_OUTPUT_ENABLE
1419  ms_ucs_map_f = UCS_MAP_ASCII;
1420 #endif
1421  break;
1422  case EUCJP_NKF:
1423  x0212_f = FALSE;
1424 #ifdef SHIFTJIS_CP932
1425  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1426 #endif
1427 #ifdef UTF8_OUTPUT_ENABLE
1428  ms_ucs_map_f = UCS_MAP_ASCII;
1429 #endif
1430  break;
1431  case CP51932:
1432  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1433 #ifdef SHIFTJIS_CP932
1434  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1435 #endif
1436 #ifdef UTF8_OUTPUT_ENABLE
1437  ms_ucs_map_f = UCS_MAP_CP932;
1438 #endif
1439  break;
1440  case EUCJP_MS:
1441  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1442  x0212_f = TRUE;
1443 #ifdef UTF8_OUTPUT_ENABLE
1444  ms_ucs_map_f = UCS_MAP_MS;
1445 #endif
1446  break;
1447  case EUCJP_ASCII:
1448  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1449  x0212_f = TRUE;
1450 #ifdef UTF8_OUTPUT_ENABLE
1451  ms_ucs_map_f = UCS_MAP_ASCII;
1452 #endif
1453  break;
1454  case SHIFT_JISX0213:
1455  case SHIFT_JIS_2004:
1456  x0213_f = TRUE;
1457 #ifdef SHIFTJIS_CP932
1458  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1459 #endif
1460  break;
1461  case EUC_JISX0213:
1462  case EUC_JIS_2004:
1463  x0212_f = TRUE;
1464  x0213_f = TRUE;
1465 #ifdef SHIFTJIS_CP932
1466  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1467 #endif
1468  break;
1469 #ifdef UTF8_OUTPUT_ENABLE
1470  case UTF_8_BOM:
1471  output_bom_f = TRUE;
1472  break;
1473  case UTF_16:
1474  case UTF_16BE_BOM:
1475  output_bom_f = TRUE;
1476  break;
1477  case UTF_16LE:
1478  output_endian = ENDIAN_LITTLE;
1479  output_bom_f = FALSE;
1480  break;
1481  case UTF_16LE_BOM:
1482  output_endian = ENDIAN_LITTLE;
1483  output_bom_f = TRUE;
1484  break;
1485  case UTF_32:
1486  case UTF_32BE_BOM:
1487  output_bom_f = TRUE;
1488  break;
1489  case UTF_32LE:
1490  output_endian = ENDIAN_LITTLE;
1491  output_bom_f = FALSE;
1492  break;
1493  case UTF_32LE_BOM:
1494  output_endian = ENDIAN_LITTLE;
1495  output_bom_f = TRUE;
1496  break;
1497 #endif
1498  }
1499 }
1500 
1501 static struct input_code*
1502 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1503 {
1504  if (iconv_func){
1505  struct input_code *p = input_code_list;
1506  while (p->name){
1507  if (iconv_func == p->iconv_func){
1508  return p;
1509  }
1510  p++;
1511  }
1512  }
1513  return 0;
1514 }
1515 
1516 static void
1517 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1518 {
1519 #ifdef INPUT_CODE_FIX
1520  if (f || !input_encoding)
1521 #endif
1522  if (estab_f != f){
1523  estab_f = f;
1524  }
1525 
1526  if (iconv_func
1527 #ifdef INPUT_CODE_FIX
1528  && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1529 #endif
1530  ){
1531  iconv = iconv_func;
1532  }
1533 #ifdef CHECK_OPTION
1534  if (estab_f && iconv_for_check != iconv){
1535  struct input_code *p = find_inputcode_byfunc(iconv);
1536  if (p){
1537  set_input_codename(p->name);
1538  debug(p->name);
1539  }
1540  iconv_for_check = iconv;
1541  }
1542 #endif
1543 }
1544 
1545 #ifdef X0212_ENABLE
1546 static nkf_char
1547 x0212_shift(nkf_char c)
1548 {
1549  nkf_char ret = c;
1550  c &= 0x7f;
1551  if (is_eucg3(ret)){
1552  if (0x75 <= c && c <= 0x7f){
1553  ret = c + (0x109 - 0x75);
1554  }
1555  }else{
1556  if (0x75 <= c && c <= 0x7f){
1557  ret = c + (0x113 - 0x75);
1558  }
1559  }
1560  return ret;
1561 }
1562 
1563 
1564 static nkf_char
1565 x0212_unshift(nkf_char c)
1566 {
1567  nkf_char ret = c;
1568  if (0x7f <= c && c <= 0x88){
1569  ret = c + (0x75 - 0x7f);
1570  }else if (0x89 <= c && c <= 0x92){
1571  ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1572  }
1573  return ret;
1574 }
1575 #endif /* X0212_ENABLE */
1576 
1577 static int
1578 is_x0213_2_in_x0212(nkf_char c1)
1579 {
1580  static const char x0213_2_table[] =
1581  {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1582  int ku = c1 - 0x20;
1583  if (ku <= 15)
1584  return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
1585  if (78 <= ku && ku <= 94)
1586  return 1;
1587  return 0;
1588 }
1589 
1590 static nkf_char
1591 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1592 {
1593  nkf_char ndx;
1594  if (is_eucg3(c2)){
1595  ndx = c2 & 0x7f;
1596  if (x0213_f && is_x0213_2_in_x0212(ndx)){
1597  if((0x21 <= ndx && ndx <= 0x2F)){
1598  if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1599  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1600  return 0;
1601  }else if(0x6E <= ndx && ndx <= 0x7E){
1602  if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1603  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1604  return 0;
1605  }
1606  return 1;
1607  }
1608 #ifdef X0212_ENABLE
1609  else if(nkf_isgraph(ndx)){
1610  nkf_char val = 0;
1611  const unsigned short *ptr;
1612  ptr = x0212_shiftjis[ndx - 0x21];
1613  if (ptr){
1614  val = ptr[(c1 & 0x7f) - 0x21];
1615  }
1616  if (val){
1617  c2 = val >> 8;
1618  c1 = val & 0xff;
1619  if (p2) *p2 = c2;
1620  if (p1) *p1 = c1;
1621  return 0;
1622  }
1623  c2 = x0212_shift(c2);
1624  }
1625 #endif /* X0212_ENABLE */
1626  }
1627  if(0x7F < c2) return 1;
1628  if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1629  if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1630  return 0;
1631 }
1632 
1633 static nkf_char
1634 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1635 {
1636 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1637  nkf_char val;
1638 #endif
1639  static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1640  if (0xFC < c1) return 1;
1641 #ifdef SHIFTJIS_CP932
1642  if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
1643  val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1644  if (val){
1645  c2 = val >> 8;
1646  c1 = val & 0xff;
1647  }
1648  }
1649  if (cp932inv_f
1650  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1651  val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1652  if (val){
1653  c2 = val >> 8;
1654  c1 = val & 0xff;
1655  }
1656  }
1657 #endif /* SHIFTJIS_CP932 */
1658 #ifdef X0212_ENABLE
1659  if (!x0213_f && is_ibmext_in_sjis(c2)){
1660  val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1661  if (val){
1662  if (val > 0x7FFF){
1663  c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1664  c1 = val & 0xff;
1665  }else{
1666  c2 = val >> 8;
1667  c1 = val & 0xff;
1668  }
1669  if (p2) *p2 = c2;
1670  if (p1) *p1 = c1;
1671  return 0;
1672  }
1673  }
1674 #endif
1675  if(c2 >= 0x80){
1676  if(x0213_f && c2 >= 0xF0){
1677  if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1678  c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1679  }else{ /* 78<=k<=94 */
1680  c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1681  if (0x9E < c1) c2++;
1682  }
1683  }else{
1684 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1685 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1686  c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1687  if (0x9E < c1) c2++;
1688  }
1689  if (c1 < 0x9F)
1690  c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1691  else {
1692  c1 = c1 - 0x7E;
1693  }
1694  }
1695 
1696 #ifdef X0212_ENABLE
1697  c2 = x0212_unshift(c2);
1698 #endif
1699  if (p2) *p2 = c2;
1700  if (p1) *p1 = c1;
1701  return 0;
1702 }
1703 
1704 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1705 static void
1706 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1707 {
1708  val &= VALUE_MASK;
1709  if (val < 0x80){
1710  *p1 = val;
1711  *p2 = 0;
1712  *p3 = 0;
1713  *p4 = 0;
1714  }else if (val < 0x800){
1715  *p1 = 0xc0 | (val >> 6);
1716  *p2 = 0x80 | (val & 0x3f);
1717  *p3 = 0;
1718  *p4 = 0;
1719  } else if (nkf_char_unicode_bmp_p(val)) {
1720  *p1 = 0xe0 | (val >> 12);
1721  *p2 = 0x80 | ((val >> 6) & 0x3f);
1722  *p3 = 0x80 | ( val & 0x3f);
1723  *p4 = 0;
1724  } else if (nkf_char_unicode_value_p(val)) {
1725  *p1 = 0xf0 | (val >> 18);
1726  *p2 = 0x80 | ((val >> 12) & 0x3f);
1727  *p3 = 0x80 | ((val >> 6) & 0x3f);
1728  *p4 = 0x80 | ( val & 0x3f);
1729  } else {
1730  *p1 = 0;
1731  *p2 = 0;
1732  *p3 = 0;
1733  *p4 = 0;
1734  }
1735 }
1736 
1737 static nkf_char
1738 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1739 {
1740  nkf_char wc;
1741  if (c1 <= 0x7F) {
1742  /* single byte */
1743  wc = c1;
1744  }
1745  else if (c1 <= 0xC1) {
1746  /* trail byte or invalid */
1747  return -1;
1748  }
1749  else if (c1 <= 0xDF) {
1750  /* 2 bytes */
1751  wc = (c1 & 0x1F) << 6;
1752  wc |= (c2 & 0x3F);
1753  }
1754  else if (c1 <= 0xEF) {
1755  /* 3 bytes */
1756  wc = (c1 & 0x0F) << 12;
1757  wc |= (c2 & 0x3F) << 6;
1758  wc |= (c3 & 0x3F);
1759  }
1760  else if (c2 <= 0xF4) {
1761  /* 4 bytes */
1762  wc = (c1 & 0x0F) << 18;
1763  wc |= (c2 & 0x3F) << 12;
1764  wc |= (c3 & 0x3F) << 6;
1765  wc |= (c4 & 0x3F);
1766  }
1767  else {
1768  return -1;
1769  }
1770  return wc;
1771 }
1772 #endif
1773 
1774 #ifdef UTF8_INPUT_ENABLE
1775 static int
1776 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1777  const unsigned short *const *pp, nkf_char psize,
1778  nkf_char *p2, nkf_char *p1)
1779 {
1780  nkf_char c2;
1781  const unsigned short *p;
1782  unsigned short val;
1783 
1784  if (pp == 0) return 1;
1785 
1786  c1 -= 0x80;
1787  if (c1 < 0 || psize <= c1) return 1;
1788  p = pp[c1];
1789  if (p == 0) return 1;
1790 
1791  c0 -= 0x80;
1792  if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1793  val = p[c0];
1794  if (val == 0) return 1;
1795  if (no_cp932ext_f && (
1796  (val>>8) == 0x2D || /* NEC special characters */
1797  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1798  )) return 1;
1799 
1800  c2 = val >> 8;
1801  if (val > 0x7FFF){
1802  c2 &= 0x7f;
1803  c2 |= PREFIX_EUCG3;
1804  }
1805  if (c2 == SO) c2 = JIS_X_0201_1976_K;
1806  c1 = val & 0xFF;
1807  if (p2) *p2 = c2;
1808  if (p1) *p1 = c1;
1809  return 0;
1810 }
1811 
1812 static int
1813 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1814 {
1815  const unsigned short *const *pp;
1816  const unsigned short *const *const *ppp;
1817  static const char no_best_fit_chars_table_C2[] =
1818  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1819  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1820  1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1821  0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1822  static const char no_best_fit_chars_table_C2_ms[] =
1823  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1824  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1825  1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1826  0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1827  static const char no_best_fit_chars_table_932_C2[] =
1828  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1829  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1830  1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1831  0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1832  static const char no_best_fit_chars_table_932_C3[] =
1833  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1834  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1835  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1836  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1837  nkf_char ret = 0;
1838 
1839  if(c2 < 0x80){
1840  *p2 = 0;
1841  *p1 = c2;
1842  }else if(c2 < 0xe0){
1843  if(no_best_fit_chars_f){
1844  if(ms_ucs_map_f == UCS_MAP_CP932){
1845  switch(c2){
1846  case 0xC2:
1847  if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1848  break;
1849  case 0xC3:
1850  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1851  break;
1852  }
1853  }else if(!cp932inv_f){
1854  switch(c2){
1855  case 0xC2:
1856  if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1857  break;
1858  case 0xC3:
1859  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1860  break;
1861  }
1862  }else if(ms_ucs_map_f == UCS_MAP_MS){
1863  if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1864  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1865  switch(c2){
1866  case 0xC2:
1867  switch(c1){
1868  case 0xA2:
1869  case 0xA3:
1870  case 0xA5:
1871  case 0xA6:
1872  case 0xAC:
1873  case 0xAF:
1874  case 0xB8:
1875  return 1;
1876  }
1877  break;
1878  }
1879  }
1880  }
1881  pp =
1882  ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1883  ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1884  ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1885  x0213_f ? utf8_to_euc_2bytes_x0213 :
1887  ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1888  }else if(c0 < 0xF0){
1889  if(no_best_fit_chars_f){
1890  if(ms_ucs_map_f == UCS_MAP_CP932){
1891  if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1892  }else if(ms_ucs_map_f == UCS_MAP_MS){
1893  switch(c2){
1894  case 0xE2:
1895  switch(c1){
1896  case 0x80:
1897  if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1898  break;
1899  case 0x88:
1900  if(c0 == 0x92) return 1;
1901  break;
1902  }
1903  break;
1904  case 0xE3:
1905  if(c1 == 0x80 || c0 == 0x9C) return 1;
1906  break;
1907  }
1908  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1909  switch(c2){
1910  case 0xE3:
1911  switch(c1){
1912  case 0x82:
1913  if(c0 == 0x94) return 1;
1914  break;
1915  case 0x83:
1916  if(c0 == 0xBB) return 1;
1917  break;
1918  }
1919  break;
1920  }
1921  }else{
1922  switch(c2){
1923  case 0xE2:
1924  switch(c1){
1925  case 0x80:
1926  if(c0 == 0x95) return 1;
1927  break;
1928  case 0x88:
1929  if(c0 == 0xA5) return 1;
1930  break;
1931  }
1932  break;
1933  case 0xEF:
1934  switch(c1){
1935  case 0xBC:
1936  if(c0 == 0x8D) return 1;
1937  break;
1938  case 0xBD:
1939  if(c0 == 0x9E && !cp932inv_f) return 1;
1940  break;
1941  case 0xBF:
1942  if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1943  break;
1944  }
1945  break;
1946  }
1947  }
1948  }
1949  ppp =
1950  ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1951  ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1952  ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1953  x0213_f ? utf8_to_euc_3bytes_x0213 :
1955  ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1956  }else return -1;
1957 #ifdef SHIFTJIS_CP932
1958  if (!ret&& is_eucg3(*p2)) {
1959  if (cp932inv_f) {
1960  if (encode_fallback) ret = 1;
1961  }
1962  else {
1963  nkf_char s2, s1;
1964  if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1965  s2e_conv(s2, s1, p2, p1);
1966  }else{
1967  ret = 1;
1968  }
1969  }
1970  }
1971 #endif
1972  return ret;
1973 }
1974 
1975 #ifdef UTF8_OUTPUT_ENABLE
1976 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1977  int i; \
1978  for (i = 0; i < size; i++) \
1979  if (tbl[i][0] == euc) { \
1980  low = tbl[i][2]; \
1981  break; \
1982  } \
1983  } while (0)
1984 
1985 static nkf_char
1986 e2w_conv(nkf_char c2, nkf_char c1)
1987 {
1988  const unsigned short *p;
1989 
1990  if (c2 == JIS_X_0201_1976_K) {
1991  if (ms_ucs_map_f == UCS_MAP_CP10001) {
1992  switch (c1) {
1993  case 0x20:
1994  return 0xA0;
1995  case 0x7D:
1996  return 0xA9;
1997  }
1998  }
1999  p = euc_to_utf8_1byte;
2000 #ifdef X0212_ENABLE
2001  } else if (is_eucg3(c2)){
2002  if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
2003  return 0xA6;
2004  }
2005  c2 = (c2&0x7f) - 0x21;
2006  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2007  p =
2008  x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
2010  else
2011  return 0;
2012 #endif
2013  } else {
2014  c2 &= 0x7f;
2015  c2 = (c2&0x7f) - 0x21;
2016  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2017  p =
2018  x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
2019  ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
2020  ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
2022  else
2023  return 0;
2024  }
2025  if (!p) return 0;
2026  c1 = (c1 & 0x7f) - 0x21;
2027  if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2028  nkf_char val = p[c1];
2029  if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2030  nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2031  nkf_char low = 0;
2032  if (p==x0212_to_utf8_2bytes_x0213[c2]) {
2033  X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
2034  } else {
2035  X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
2036  }
2037  if (!low) return 0;
2038  return UTF16_TO_UTF32(val, low);
2039  } else {
2040  return val;
2041  }
2042  }
2043  return 0;
2044 }
2045 
2046 static nkf_char
2047 e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
2048 {
2049  nkf_char euc;
2050  int i;
2051  for (i = 0; i < sizeof_x0213_combining_chars; i++)
2052  if (x0213_combining_chars[i] == comb)
2053  break;
2054  if (i >= sizeof_x0213_combining_chars)
2055  return 0;
2056  euc = (c2&0x7f)<<8 | (c1&0x7f);
2057  for (i = 0; i < sizeof_x0213_combining_table; i++)
2058  if (x0213_combining_table[i][0] == euc)
2059  return x0213_combining_table[i][1];
2060  return 0;
2061 }
2062 #endif
2063 
2064 static nkf_char
2065 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2066 {
2067  nkf_char ret = 0;
2068 
2069  if (!c1){
2070  *p2 = 0;
2071  *p1 = c2;
2072  }else if (0xc0 <= c2 && c2 <= 0xef) {
2073  ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2074 #ifdef NUMCHAR_OPTION
2075  if (ret > 0){
2076  if (p2) *p2 = 0;
2077  if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
2078  ret = 0;
2079  }
2080 #endif
2081  }
2082  return ret;
2083 }
2084 
2085 #ifdef UTF8_INPUT_ENABLE
2086 static nkf_char
2087 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
2088 {
2089  nkf_char c1, c2, c3, c4;
2090  nkf_char ret = 0;
2091  val &= VALUE_MASK;
2092  if (val < 0x80) {
2093  *p2 = 0;
2094  *p1 = val;
2095  }
2096  else if (nkf_char_unicode_bmp_p(val)){
2097  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2098  ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2099  if (ret > 0){
2100  *p2 = 0;
2101  *p1 = nkf_char_unicode_new(val);
2102  ret = 0;
2103  }
2104  }
2105  else {
2106  int i;
2107  if (x0213_f) {
2108  c1 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2109  c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2110  for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2111  if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
2112  val = x0213_1_surrogate_table[i][0];
2113  *p2 = val >> 8;
2114  *p1 = val & 0xFF;
2115  return 0;
2116  }
2117  for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2118  if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
2119  val = x0213_2_surrogate_table[i][0];
2120  *p2 = PREFIX_EUCG3 | (val >> 8);
2121  *p1 = val & 0xFF;
2122  return 0;
2123  }
2124  }
2125  *p2 = 0;
2126  *p1 = nkf_char_unicode_new(val);
2127  }
2128  return ret;
2129 }
2130 #endif
2131 
2132 static nkf_char
2133 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2134 {
2135  if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2136  if (iso2022jp_f && !x0201_f) {
2137  c2 = GETA1; c1 = GETA2;
2138  } else {
2139  c2 = JIS_X_0201_1976_K;
2140  c1 &= 0x7f;
2141  }
2142 #ifdef X0212_ENABLE
2143  }else if (c2 == 0x8f){
2144  if (c0 == 0){
2145  return -1;
2146  }
2147  if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2148  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2149  c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2150  c2 = 0;
2151  } else {
2152  c2 = (c2 << 8) | (c1 & 0x7f);
2153  c1 = c0 & 0x7f;
2154 #ifdef SHIFTJIS_CP932
2155  if (cp51932_f){
2156  nkf_char s2, s1;
2157  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2158  s2e_conv(s2, s1, &c2, &c1);
2159  if (c2 < 0x100){
2160  c1 &= 0x7f;
2161  c2 &= 0x7f;
2162  }
2163  }
2164  }
2165 #endif /* SHIFTJIS_CP932 */
2166  }
2167 #endif /* X0212_ENABLE */
2168  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2169  /* NOP */
2170  } else {
2171  if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2172  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2173  c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2174  c2 = 0;
2175  } else {
2176  c1 &= 0x7f;
2177  c2 &= 0x7f;
2178 #ifdef SHIFTJIS_CP932
2179  if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2180  nkf_char s2, s1;
2181  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2182  s2e_conv(s2, s1, &c2, &c1);
2183  if (c2 < 0x100){
2184  c1 &= 0x7f;
2185  c2 &= 0x7f;
2186  }
2187  }
2188  }
2189 #endif /* SHIFTJIS_CP932 */
2190  }
2191  }
2192  (*oconv)(c2, c1);
2193  return 0;
2194 }
2195 
2196 static nkf_char
2197 s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2198 {
2199  if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2200  if (iso2022jp_f && !x0201_f) {
2201  c2 = GETA1; c1 = GETA2;
2202  } else {
2203  c1 &= 0x7f;
2204  }
2205  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2206  /* NOP */
2207  } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2208  /* CP932 UDC */
2209  if(c1 == 0x7F) return 0;
2210  c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2211  c2 = 0;
2212  } else {
2213  nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2214  if (ret) return ret;
2215  }
2216  (*oconv)(c2, c1);
2217  return 0;
2218 }
2219 
2220 static int
2221 x0213_wait_combining_p(nkf_char wc)
2222 {
2223  int i;
2224  for (i = 0; i < sizeof_x0213_combining_table; i++) {
2225  if (x0213_combining_table[i][1] == wc) {
2226  return TRUE;
2227  }
2228  }
2229  return FALSE;
2230 }
2231 
2232 static int
2233 x0213_combining_p(nkf_char wc)
2234 {
2235  int i;
2236  for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2237  if (x0213_combining_chars[i] == wc) {
2238  return TRUE;
2239  }
2240  }
2241  return FALSE;
2242 }
2243 
2244 static nkf_char
2245 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2246 {
2247  nkf_char ret = 0, c4 = 0;
2248  static const char w_iconv_utf8_1st_byte[] =
2249  { /* 0xC0 - 0xFF */
2250  20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2251  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2252  30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2253  40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2254 
2255  if (c3 > 0xFF) {
2256  c4 = c3 & 0xFF;
2257  c3 >>= 8;
2258  }
2259 
2260  if (c1 < 0 || 0xff < c1) {
2261  }else if (c1 == 0) { /* 0 : 1 byte*/
2262  c3 = 0;
2263  } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2264  return 0;
2265  } else{
2266  switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2267  case 21:
2268  if (c2 < 0x80 || 0xBF < c2) return 0;
2269  break;
2270  case 30:
2271  if (c3 == 0) return -1;
2272  if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2273  return 0;
2274  break;
2275  case 31:
2276  case 33:
2277  if (c3 == 0) return -1;
2278  if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2279  return 0;
2280  break;
2281  case 32:
2282  if (c3 == 0) return -1;
2283  if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2284  return 0;
2285  break;
2286  case 40:
2287  if (c3 == 0) return -2;
2288  if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2289  return 0;
2290  break;
2291  case 41:
2292  if (c3 == 0) return -2;
2293  if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2294  return 0;
2295  break;
2296  case 42:
2297  if (c3 == 0) return -2;
2298  if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2299  return 0;
2300  break;
2301  default:
2302  return 0;
2303  break;
2304  }
2305  }
2306  if (c1 == 0 || c1 == EOF){
2307  } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2308  c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2309  c1 = 0;
2310  } else {
2311  if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2312  return -3;
2313  ret = w2e_conv(c1, c2, c3, &c1, &c2);
2314  }
2315  if (ret == 0){
2316  (*oconv)(c1, c2);
2317  }
2318  return ret;
2319 }
2320 
2321 static nkf_char
2322 w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
2323 {
2324  /* continue from the line below 'return -3;' in w_iconv() */
2325  nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2326  if (ret == 0){
2327  (*oconv)(c1, c2);
2328  }
2329  return ret;
2330 }
2331 
2332 #define NKF_ICONV_INVALID_CODE_RANGE -13
2333 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
2334 #define NKF_ICONV_NOT_COMBINED -15
2335 static size_t
2336 unicode_iconv(nkf_char wc, int nocombine)
2337 {
2338  nkf_char c1, c2;
2339  int ret = 0;
2340 
2341  if (wc < 0x80) {
2342  c2 = 0;
2343  c1 = wc;
2344  }else if ((wc>>11) == 27) {
2345  /* unpaired surrogate */
2347  }else if (wc < 0xFFFF) {
2348  if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2350  ret = w16e_conv(wc, &c2, &c1);
2351  if (ret) return ret;
2352  }else if (wc < 0x10FFFF) {
2353  c2 = 0;
2354  c1 = nkf_char_unicode_new(wc);
2355  } else {
2357  }
2358  (*oconv)(c2, c1);
2359  return 0;
2360 }
2361 
2362 static nkf_char
2363 unicode_iconv_combine(nkf_char wc, nkf_char wc2)
2364 {
2365  nkf_char c1, c2;
2366  int i;
2367 
2368  if (wc2 < 0x80) {
2369  return NKF_ICONV_NOT_COMBINED;
2370  }else if ((wc2>>11) == 27) {
2371  /* unpaired surrogate */
2373  }else if (wc2 < 0xFFFF) {
2374  if (!x0213_combining_p(wc2))
2375  return NKF_ICONV_NOT_COMBINED;
2376  for (i = 0; i < sizeof_x0213_combining_table; i++) {
2377  if (x0213_combining_table[i][1] == wc &&
2378  x0213_combining_table[i][2] == wc2) {
2379  c2 = x0213_combining_table[i][0] >> 8;
2380  c1 = x0213_combining_table[i][0] & 0x7f;
2381  (*oconv)(c2, c1);
2382  return 0;
2383  }
2384  }
2385  }else if (wc2 < 0x10FFFF) {
2386  return NKF_ICONV_NOT_COMBINED;
2387  } else {
2389  }
2390  return NKF_ICONV_NOT_COMBINED;
2391 }
2392 
2393 static nkf_char
2394 w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
2395 {
2396  nkf_char wc, wc2;
2397  wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2398  wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2399  if (wc2 < 0)
2400  return wc2;
2401  return unicode_iconv_combine(wc, wc2);
2402 }
2403 
2404 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2405 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2406 static size_t
2407 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2408 {
2409  nkf_char wc;
2410 
2411  if (c1 == EOF) {
2412  (*oconv)(EOF, 0);
2413  return 0;
2414  }
2415 
2416  if (input_endian == ENDIAN_BIG) {
2417  if (0xD8 <= c1 && c1 <= 0xDB) {
2418  if (0xDC <= c3 && c3 <= 0xDF) {
2419  wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2420  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2421  } else {
2422  wc = c1 << 8 | c2;
2423  }
2424  } else {
2425  if (0xD8 <= c2 && c2 <= 0xDB) {
2426  if (0xDC <= c4 && c4 <= 0xDF) {
2427  wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2428  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2429  } else {
2430  wc = c2 << 8 | c1;
2431  }
2432  }
2433 
2434  return (*unicode_iconv)(wc, FALSE);
2435 }
2436 
2437 static size_t
2438 nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2439 {
2440  nkf_char wc, wc2;
2441 
2442  if (input_endian == ENDIAN_BIG) {
2443  if (0xD8 <= c3 && c3 <= 0xDB) {
2444  return NKF_ICONV_NOT_COMBINED;
2445  } else {
2446  wc = c1 << 8 | c2;
2447  wc2 = c3 << 8 | c4;
2448  }
2449  } else {
2450  if (0xD8 <= c2 && c2 <= 0xDB) {
2451  return NKF_ICONV_NOT_COMBINED;
2452  } else {
2453  wc = c2 << 8 | c1;
2454  wc2 = c4 << 8 | c3;
2455  }
2456  }
2457 
2458  return unicode_iconv_combine(wc, wc2);
2459 }
2460 
2461 static size_t
2462 nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
2463 {
2464  nkf_char wc;
2465  if (input_endian == ENDIAN_BIG)
2466  wc = c1 << 8 | c2;
2467  else
2468  wc = c2 << 8 | c1;
2469  return (*unicode_iconv)(wc, TRUE);
2470 }
2471 
2472 static nkf_char
2473 w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2474 {
2475  (*oconv)(c2, c1);
2476  return 16; /* different from w_iconv32 */
2477 }
2478 
2479 static nkf_char
2480 w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2481 {
2482  (*oconv)(c2, c1);
2483  return 32; /* different from w_iconv16 */
2484 }
2485 
2486 static nkf_char
2487 utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2488 {
2489  nkf_char wc;
2490 
2491  switch(input_endian){
2492  case ENDIAN_BIG:
2493  wc = c2 << 16 | c3 << 8 | c4;
2494  break;
2495  case ENDIAN_LITTLE:
2496  wc = c3 << 16 | c2 << 8 | c1;
2497  break;
2498  case ENDIAN_2143:
2499  wc = c1 << 16 | c4 << 8 | c3;
2500  break;
2501  case ENDIAN_3412:
2502  wc = c4 << 16 | c1 << 8 | c2;
2503  break;
2504  default:
2506  }
2507  return wc;
2508 }
2509 
2510 static size_t
2511 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2512 {
2513  nkf_char wc;
2514 
2515  if (c1 == EOF) {
2516  (*oconv)(EOF, 0);
2517  return 0;
2518  }
2519 
2520  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2521  if (wc < 0)
2522  return wc;
2523 
2524  return (*unicode_iconv)(wc, FALSE);
2525 }
2526 
2527 static nkf_char
2528 nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
2529 {
2530  nkf_char wc, wc2;
2531 
2532  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2533  if (wc < 0)
2534  return wc;
2535  wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2536  if (wc2 < 0)
2537  return wc2;
2538 
2539  return unicode_iconv_combine(wc, wc2);
2540 }
2541 
2542 static size_t
2543 nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2544 {
2545  nkf_char wc;
2546 
2547  wc = utf32_to_nkf_char(c1, c2, c3, c4);
2548  return (*unicode_iconv)(wc, TRUE);
2549 }
2550 #endif
2551 
2552 #define output_ascii_escape_sequence(mode) do { \
2553  if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2554  (*o_putc)(ESC); \
2555  (*o_putc)('('); \
2556  (*o_putc)(ascii_intro); \
2557  output_mode = mode; \
2558  } \
2559  } while (0)
2560 
2561 static void
2562 output_escape_sequence(int mode)
2563 {
2564  if (output_mode == mode)
2565  return;
2566  switch(mode) {
2567  case ISO_8859_1:
2568  (*o_putc)(ESC);
2569  (*o_putc)('.');
2570  (*o_putc)('A');
2571  break;
2572  case JIS_X_0201_1976_K:
2573  (*o_putc)(ESC);
2574  (*o_putc)('(');
2575  (*o_putc)('I');
2576  break;
2577  case JIS_X_0208:
2578  (*o_putc)(ESC);
2579  (*o_putc)('$');
2580  (*o_putc)(kanji_intro);
2581  break;
2582  case JIS_X_0212:
2583  (*o_putc)(ESC);
2584  (*o_putc)('$');
2585  (*o_putc)('(');
2586  (*o_putc)('D');
2587  break;
2588  case JIS_X_0213_1:
2589  (*o_putc)(ESC);
2590  (*o_putc)('$');
2591  (*o_putc)('(');
2592  (*o_putc)('Q');
2593  break;
2594  case JIS_X_0213_2:
2595  (*o_putc)(ESC);
2596  (*o_putc)('$');
2597  (*o_putc)('(');
2598  (*o_putc)('P');
2599  break;
2600  }
2601  output_mode = mode;
2602 }
2603 
2604 static void
2605 j_oconv(nkf_char c2, nkf_char c1)
2606 {
2607 #ifdef NUMCHAR_OPTION
2608  if (c2 == 0 && nkf_char_unicode_p(c1)){
2609  w16e_conv(c1, &c2, &c1);
2610  if (c2 == 0 && nkf_char_unicode_p(c1)){
2611  c2 = c1 & VALUE_MASK;
2612  if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2613  /* CP5022x UDC */
2614  c1 &= 0xFFF;
2615  c2 = 0x7F + c1 / 94;
2616  c1 = 0x21 + c1 % 94;
2617  } else {
2618  if (encode_fallback) (*encode_fallback)(c1);
2619  return;
2620  }
2621  }
2622  }
2623 #endif
2624  if (c2 == 0) {
2626  (*o_putc)(c1);
2627  }
2628  else if (c2 == EOF) {
2630  (*o_putc)(EOF);
2631  }
2632  else if (c2 == ISO_8859_1) {
2634  (*o_putc)(c1|0x80);
2635  }
2636  else if (c2 == JIS_X_0201_1976_K) {
2637  output_escape_sequence(JIS_X_0201_1976_K);
2638  (*o_putc)(c1);
2639 #ifdef X0212_ENABLE
2640  } else if (is_eucg3(c2)){
2641  output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2642  (*o_putc)(c2 & 0x7f);
2643  (*o_putc)(c1);
2644 #endif
2645  } else {
2646  if(ms_ucs_map_f
2647  ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2648  : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2649  output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2650  (*o_putc)(c2);
2651  (*o_putc)(c1);
2652  }
2653 }
2654 
2655 static void
2656 e_oconv(nkf_char c2, nkf_char c1)
2657 {
2658  if (c2 == 0 && nkf_char_unicode_p(c1)){
2659  w16e_conv(c1, &c2, &c1);
2660  if (c2 == 0 && nkf_char_unicode_p(c1)){
2661  c2 = c1 & VALUE_MASK;
2662  if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2663  /* eucJP-ms UDC */
2664  c1 &= 0xFFF;
2665  c2 = c1 / 94;
2666  c2 += c2 < 10 ? 0x75 : 0x8FEB;
2667  c1 = 0x21 + c1 % 94;
2668  if (is_eucg3(c2)){
2669  (*o_putc)(0x8f);
2670  (*o_putc)((c2 & 0x7f) | 0x080);
2671  (*o_putc)(c1 | 0x080);
2672  }else{
2673  (*o_putc)((c2 & 0x7f) | 0x080);
2674  (*o_putc)(c1 | 0x080);
2675  }
2676  return;
2677  } else {
2678  if (encode_fallback) (*encode_fallback)(c1);
2679  return;
2680  }
2681  }
2682  }
2683 
2684  if (c2 == EOF) {
2685  (*o_putc)(EOF);
2686  } else if (c2 == 0) {
2687  output_mode = ASCII;
2688  (*o_putc)(c1);
2689  } else if (c2 == JIS_X_0201_1976_K) {
2690  output_mode = EUC_JP;
2691  (*o_putc)(SS2); (*o_putc)(c1|0x80);
2692  } else if (c2 == ISO_8859_1) {
2693  output_mode = ISO_8859_1;
2694  (*o_putc)(c1 | 0x080);
2695 #ifdef X0212_ENABLE
2696  } else if (is_eucg3(c2)){
2697  output_mode = EUC_JP;
2698 #ifdef SHIFTJIS_CP932
2699  if (!cp932inv_f){
2700  nkf_char s2, s1;
2701  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2702  s2e_conv(s2, s1, &c2, &c1);
2703  }
2704  }
2705 #endif
2706  if (c2 == 0) {
2707  output_mode = ASCII;
2708  (*o_putc)(c1);
2709  }else if (is_eucg3(c2)){
2710  if (x0212_f){
2711  (*o_putc)(0x8f);
2712  (*o_putc)((c2 & 0x7f) | 0x080);
2713  (*o_putc)(c1 | 0x080);
2714  }
2715  }else{
2716  (*o_putc)((c2 & 0x7f) | 0x080);
2717  (*o_putc)(c1 | 0x080);
2718  }
2719 #endif
2720  } else {
2721  if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2722  set_iconv(FALSE, 0);
2723  return; /* too late to rescue this char */
2724  }
2725  output_mode = EUC_JP;
2726  (*o_putc)(c2 | 0x080);
2727  (*o_putc)(c1 | 0x080);
2728  }
2729 }
2730 
2731 static void
2732 s_oconv(nkf_char c2, nkf_char c1)
2733 {
2734 #ifdef NUMCHAR_OPTION
2735  if (c2 == 0 && nkf_char_unicode_p(c1)){
2736  w16e_conv(c1, &c2, &c1);
2737  if (c2 == 0 && nkf_char_unicode_p(c1)){
2738  c2 = c1 & VALUE_MASK;
2739  if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2740  /* CP932 UDC */
2741  c1 &= 0xFFF;
2742  c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2743  c1 = c1 % 188;
2744  c1 += 0x40 + (c1 > 0x3e);
2745  (*o_putc)(c2);
2746  (*o_putc)(c1);
2747  return;
2748  } else {
2749  if(encode_fallback)(*encode_fallback)(c1);
2750  return;
2751  }
2752  }
2753  }
2754 #endif
2755  if (c2 == EOF) {
2756  (*o_putc)(EOF);
2757  return;
2758  } else if (c2 == 0) {
2759  output_mode = ASCII;
2760  (*o_putc)(c1);
2761  } else if (c2 == JIS_X_0201_1976_K) {
2762  output_mode = SHIFT_JIS;
2763  (*o_putc)(c1|0x80);
2764  } else if (c2 == ISO_8859_1) {
2765  output_mode = ISO_8859_1;
2766  (*o_putc)(c1 | 0x080);
2767 #ifdef X0212_ENABLE
2768  } else if (is_eucg3(c2)){
2769  output_mode = SHIFT_JIS;
2770  if (e2s_conv(c2, c1, &c2, &c1) == 0){
2771  (*o_putc)(c2);
2772  (*o_putc)(c1);
2773  }
2774 #endif
2775  } else {
2776  if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2777  set_iconv(FALSE, 0);
2778  return; /* too late to rescue this char */
2779  }
2780  output_mode = SHIFT_JIS;
2781  e2s_conv(c2, c1, &c2, &c1);
2782 
2783 #ifdef SHIFTJIS_CP932
2784  if (cp932inv_f
2785  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2786  nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2787  if (c){
2788  c2 = c >> 8;
2789  c1 = c & 0xff;
2790  }
2791  }
2792 #endif /* SHIFTJIS_CP932 */
2793 
2794  (*o_putc)(c2);
2795  if (prefix_table[(unsigned char)c1]){
2796  (*o_putc)(prefix_table[(unsigned char)c1]);
2797  }
2798  (*o_putc)(c1);
2799  }
2800 }
2801 
2802 #ifdef UTF8_OUTPUT_ENABLE
2803 #define OUTPUT_UTF8(val) do { \
2804  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2805  (*o_putc)(c1); \
2806  if (c2) (*o_putc)(c2); \
2807  if (c3) (*o_putc)(c3); \
2808  if (c4) (*o_putc)(c4); \
2809  } while (0)
2810 
2811 static void
2812 w_oconv(nkf_char c2, nkf_char c1)
2813 {
2814  nkf_char c3, c4;
2815  nkf_char val, val2;
2816 
2817  if (output_bom_f) {
2818  output_bom_f = FALSE;
2819  (*o_putc)('\357');
2820  (*o_putc)('\273');
2821  (*o_putc)('\277');
2822  }
2823 
2824  if (c2 == EOF) {
2825  (*o_putc)(EOF);
2826  return;
2827  }
2828 
2829  if (c2 == 0 && nkf_char_unicode_p(c1)){
2830  val = c1 & VALUE_MASK;
2831  OUTPUT_UTF8(val);
2832  return;
2833  }
2834 
2835  if (c2 == 0) {
2836  (*o_putc)(c1);
2837  } else {
2838  val = e2w_conv(c2, c1);
2839  if (val){
2840  val2 = e2w_combining(val, c2, c1);
2841  if (val2)
2842  OUTPUT_UTF8(val2);
2843  OUTPUT_UTF8(val);
2844  }
2845  }
2846 }
2847 
2848 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
2849  if (output_endian == ENDIAN_LITTLE){ \
2850  (*o_putc)(c1); \
2851  (*o_putc)(c2); \
2852  }else{ \
2853  (*o_putc)(c2); \
2854  (*o_putc)(c1); \
2855  } \
2856  } while (0)
2857 
2858 #define OUTPUT_UTF16(val) do { \
2859  if (nkf_char_unicode_bmp_p(val)) { \
2860  c2 = (val >> 8) & 0xff; \
2861  c1 = val & 0xff; \
2862  OUTPUT_UTF16_BYTES(c1, c2); \
2863  } else { \
2864  val &= VALUE_MASK; \
2865  if (val <= UNICODE_MAX) { \
2866  c2 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ \
2867  c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
2868  OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2869  OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2870  } \
2871  } \
2872  } while (0)
2873 
2874 static void
2875 w_oconv16(nkf_char c2, nkf_char c1)
2876 {
2877  if (output_bom_f) {
2878  output_bom_f = FALSE;
2879  OUTPUT_UTF16_BYTES(0xFF, 0xFE);
2880  }
2881 
2882  if (c2 == EOF) {
2883  (*o_putc)(EOF);
2884  return;
2885  }
2886 
2887  if (c2 == 0 && nkf_char_unicode_p(c1)) {
2888  OUTPUT_UTF16(c1);
2889  } else if (c2) {
2890  nkf_char val, val2;
2891  val = e2w_conv(c2, c1);
2892  if (!val) return;
2893  val2 = e2w_combining(val, c2, c1);
2894  if (val2)
2895  OUTPUT_UTF16(val2);
2896  OUTPUT_UTF16(val);
2897  } else {
2898  OUTPUT_UTF16_BYTES(c1, c2);
2899  }
2900 }
2901 
2902 #define OUTPUT_UTF32(c) do { \
2903  if (output_endian == ENDIAN_LITTLE){ \
2904  (*o_putc)( (c) & 0xFF); \
2905  (*o_putc)(((c) >> 8) & 0xFF); \
2906  (*o_putc)(((c) >> 16) & 0xFF); \
2907  (*o_putc)(0); \
2908  }else{ \
2909  (*o_putc)(0); \
2910  (*o_putc)(((c) >> 16) & 0xFF); \
2911  (*o_putc)(((c) >> 8) & 0xFF); \
2912  (*o_putc)( (c) & 0xFF); \
2913  } \
2914  } while (0)
2915 
2916 static void
2917 w_oconv32(nkf_char c2, nkf_char c1)
2918 {
2919  if (output_bom_f) {
2920  output_bom_f = FALSE;
2921  if (output_endian == ENDIAN_LITTLE){
2922  (*o_putc)(0xFF);
2923  (*o_putc)(0xFE);
2924  (*o_putc)(0);
2925  (*o_putc)(0);
2926  }else{
2927  (*o_putc)(0);
2928  (*o_putc)(0);
2929  (*o_putc)(0xFE);
2930  (*o_putc)(0xFF);
2931  }
2932  }
2933 
2934  if (c2 == EOF) {
2935  (*o_putc)(EOF);
2936  return;
2937  }
2938 
2939  if (c2 == ISO_8859_1) {
2940  c1 |= 0x80;
2941  } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2942  c1 &= VALUE_MASK;
2943  } else if (c2) {
2944  nkf_char val, val2;
2945  val = e2w_conv(c2, c1);
2946  if (!val) return;
2947  val2 = e2w_combining(val, c2, c1);
2948  if (val2)
2949  OUTPUT_UTF32(val2);
2950  c1 = val;
2951  }
2952  OUTPUT_UTF32(c1);
2953 }
2954 #endif
2955 
2956 #define SCORE_L2 (1) /* Kanji Level 2 */
2957 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2958 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2959 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2960 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2961 #define SCORE_X0213 (SCORE_X0212 << 1) /* JIS X 0213 */
2962 #define SCORE_NO_EXIST (SCORE_X0213 << 1) /* Undefined Characters */
2963 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2964 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2965 
2966 #define SCORE_INIT (SCORE_iMIME)
2967 
2968 static const nkf_char score_table_A0[] = {
2969  0, 0, 0, 0,
2970  0, 0, 0, 0,
2973 };
2974 
2975 static const nkf_char score_table_F0[] = {
2980 };
2981 
2982 static const nkf_char score_table_8FA0[] = {
2987 };
2988 
2989 static const nkf_char score_table_8FE0[] = {
2994 };
2995 
2996 static const nkf_char score_table_8FF0[] = {
3001 };
3002 
3003 static void
3004 set_code_score(struct input_code *ptr, nkf_char score)
3005 {
3006  if (ptr){
3007  ptr->score |= score;
3008  }
3009 }
3010 
3011 static void
3012 clr_code_score(struct input_code *ptr, nkf_char score)
3013 {
3014  if (ptr){
3015  ptr->score &= ~score;
3016  }
3017 }
3018 
3019 static void
3020 code_score(struct input_code *ptr)
3021 {
3022  nkf_char c2 = ptr->buf[0];
3023  nkf_char c1 = ptr->buf[1];
3024  if (c2 < 0){
3025  set_code_score(ptr, SCORE_ERROR);
3026  }else if (c2 == SS2){
3027  set_code_score(ptr, SCORE_KANA);
3028  }else if (c2 == 0x8f){
3029  if ((c1 & 0x70) == 0x20){
3030  set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
3031  }else if ((c1 & 0x70) == 0x60){
3032  set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
3033  }else if ((c1 & 0x70) == 0x70){
3034  set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
3035  }else{
3036  set_code_score(ptr, SCORE_X0212);
3037  }
3038 #ifdef UTF8_OUTPUT_ENABLE
3039  }else if (!e2w_conv(c2, c1)){
3040  set_code_score(ptr, SCORE_NO_EXIST);
3041 #endif
3042  }else if ((c2 & 0x70) == 0x20){
3043  set_code_score(ptr, score_table_A0[c2 & 0x0f]);
3044  }else if ((c2 & 0x70) == 0x70){
3045  set_code_score(ptr, score_table_F0[c2 & 0x0f]);
3046  }else if ((c2 & 0x70) >= 0x50){
3047  set_code_score(ptr, SCORE_L2);
3048  }
3049 }
3050 
3051 static void
3052 status_disable(struct input_code *ptr)
3053 {
3054  ptr->stat = -1;
3055  ptr->buf[0] = -1;
3056  code_score(ptr);
3057  if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
3058 }
3059 
3060 static void
3061 status_push_ch(struct input_code *ptr, nkf_char c)
3062 {
3063  ptr->buf[ptr->index++] = c;
3064 }
3065 
3066 static void
3067 status_clear(struct input_code *ptr)
3068 {
3069  ptr->stat = 0;
3070  ptr->index = 0;
3071 }
3072 
3073 static void
3074 status_reset(struct input_code *ptr)
3075 {
3076  status_clear(ptr);
3077  ptr->score = SCORE_INIT;
3078 }
3079 
3080 static void
3081 status_reinit(struct input_code *ptr)
3082 {
3083  status_reset(ptr);
3084  ptr->_file_stat = 0;
3085 }
3086 
3087 static void
3088 status_check(struct input_code *ptr, nkf_char c)
3089 {
3090  if (c <= DEL && estab_f){
3091  status_reset(ptr);
3092  }
3093 }
3094 
3095 static void
3096 s_status(struct input_code *ptr, nkf_char c)
3097 {
3098  switch(ptr->stat){
3099  case -1:
3100  status_check(ptr, c);
3101  break;
3102  case 0:
3103  if (c <= DEL){
3104  break;
3105  }else if (nkf_char_unicode_p(c)){
3106  break;
3107  }else if (0xa1 <= c && c <= 0xdf){
3108  status_push_ch(ptr, SS2);
3109  status_push_ch(ptr, c);
3110  code_score(ptr);
3111  status_clear(ptr);
3112  }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3113  ptr->stat = 1;
3114  status_push_ch(ptr, c);
3115  }else if (0xed <= c && c <= 0xee){
3116  ptr->stat = 3;
3117  status_push_ch(ptr, c);
3118 #ifdef SHIFTJIS_CP932
3119  }else if (is_ibmext_in_sjis(c)){
3120  ptr->stat = 2;
3121  status_push_ch(ptr, c);
3122 #endif /* SHIFTJIS_CP932 */
3123 #ifdef X0212_ENABLE
3124  }else if (0xf0 <= c && c <= 0xfc){
3125  ptr->stat = 1;
3126  status_push_ch(ptr, c);
3127 #endif /* X0212_ENABLE */
3128  }else{
3129  status_disable(ptr);
3130  }
3131  break;
3132  case 1:
3133  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3134  status_push_ch(ptr, c);
3135  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3136  code_score(ptr);
3137  status_clear(ptr);
3138  }else{
3139  status_disable(ptr);
3140  }
3141  break;
3142  case 2:
3143 #ifdef SHIFTJIS_CP932
3144  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3145  status_push_ch(ptr, c);
3146  if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
3147  set_code_score(ptr, SCORE_CP932);
3148  status_clear(ptr);
3149  break;
3150  }
3151  }
3152 #endif /* SHIFTJIS_CP932 */
3153  status_disable(ptr);
3154  break;
3155  case 3:
3156  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3157  status_push_ch(ptr, c);
3158  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3159  set_code_score(ptr, SCORE_CP932);
3160  status_clear(ptr);
3161  }else{
3162  status_disable(ptr);
3163  }
3164  break;
3165  }
3166 }
3167 
3168 static void
3169 e_status(struct input_code *ptr, nkf_char c)
3170 {
3171  switch (ptr->stat){
3172  case -1:
3173  status_check(ptr, c);
3174  break;
3175  case 0:
3176  if (c <= DEL){
3177  break;
3178  }else if (nkf_char_unicode_p(c)){
3179  break;
3180  }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
3181  ptr->stat = 1;
3182  status_push_ch(ptr, c);
3183 #ifdef X0212_ENABLE
3184  }else if (0x8f == c){
3185  ptr->stat = 2;
3186  status_push_ch(ptr, c);
3187 #endif /* X0212_ENABLE */
3188  }else{
3189  status_disable(ptr);
3190  }
3191  break;
3192  case 1:
3193  if (0xa1 <= c && c <= 0xfe){
3194  status_push_ch(ptr, c);
3195  code_score(ptr);
3196  status_clear(ptr);
3197  }else{
3198  status_disable(ptr);
3199  }
3200  break;
3201 #ifdef X0212_ENABLE
3202  case 2:
3203  if (0xa1 <= c && c <= 0xfe){
3204  ptr->stat = 1;
3205  status_push_ch(ptr, c);
3206  }else{
3207  status_disable(ptr);
3208  }
3209 #endif /* X0212_ENABLE */
3210  }
3211 }
3212 
3213 #ifdef UTF8_INPUT_ENABLE
3214 static void
3215 w_status(struct input_code *ptr, nkf_char c)
3216 {
3217  switch (ptr->stat){
3218  case -1:
3219  status_check(ptr, c);
3220  break;
3221  case 0:
3222  if (c <= DEL){
3223  break;
3224  }else if (nkf_char_unicode_p(c)){
3225  break;
3226  }else if (0xc0 <= c && c <= 0xdf){
3227  ptr->stat = 1;
3228  status_push_ch(ptr, c);
3229  }else if (0xe0 <= c && c <= 0xef){
3230  ptr->stat = 2;
3231  status_push_ch(ptr, c);
3232  }else if (0xf0 <= c && c <= 0xf4){
3233  ptr->stat = 3;
3234  status_push_ch(ptr, c);
3235  }else{
3236  status_disable(ptr);
3237  }
3238  break;
3239  case 1:
3240  case 2:
3241  if (0x80 <= c && c <= 0xbf){
3242  status_push_ch(ptr, c);
3243  if (ptr->index > ptr->stat){
3244  int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
3245  && ptr->buf[2] == 0xbf);
3246  w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
3247  &ptr->buf[0], &ptr->buf[1]);
3248  if (!bom){
3249  code_score(ptr);
3250  }
3251  status_clear(ptr);
3252  }
3253  }else{
3254  status_disable(ptr);
3255  }
3256  break;
3257  case 3:
3258  if (0x80 <= c && c <= 0xbf){
3259  if (ptr->index < ptr->stat){
3260  status_push_ch(ptr, c);
3261  } else {
3262  status_clear(ptr);
3263  }
3264  }else{
3265  status_disable(ptr);
3266  }
3267  break;
3268  }
3269 }
3270 #endif
3271 
3272 static void
3273 code_status(nkf_char c)
3274 {
3275  int action_flag = 1;
3276  struct input_code *result = 0;
3277  struct input_code *p = input_code_list;
3278  while (p->name){
3279  if (!p->status_func) {
3280  ++p;
3281  continue;
3282  }
3283  if (!p->status_func)
3284  continue;
3285  (p->status_func)(p, c);
3286  if (p->stat > 0){
3287  action_flag = 0;
3288  }else if(p->stat == 0){
3289  if (result){
3290  action_flag = 0;
3291  }else{
3292  result = p;
3293  }
3294  }
3295  ++p;
3296  }
3297 
3298  if (action_flag){
3299  if (result && !estab_f){
3300  set_iconv(TRUE, result->iconv_func);
3301  }else if (c <= DEL){
3302  struct input_code *ptr = input_code_list;
3303  while (ptr->name){
3304  status_reset(ptr);
3305  ++ptr;
3306  }
3307  }
3308  }
3309 }
3310 
3311 typedef struct {
3317 } nkf_state_t;
3318 
3319 static nkf_state_t *nkf_state = NULL;
3320 
3321 #define STD_GC_BUFSIZE (256)
3322 
3323 static void
3324 nkf_state_init(void)
3325 {
3326  if (nkf_state) {
3327  nkf_buf_clear(nkf_state->std_gc_buf);
3328  nkf_buf_clear(nkf_state->broken_buf);
3329  nkf_buf_clear(nkf_state->nfc_buf);
3330  }
3331  else {
3332  nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3333  nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3334  nkf_state->broken_buf = nkf_buf_new(3);
3335  nkf_state->nfc_buf = nkf_buf_new(9);
3336  }
3337  nkf_state->broken_state = 0;
3338  nkf_state->mimeout_state = 0;
3339 }
3340 
3341 #ifndef WIN32DLL
3342 static nkf_char
3343 std_getc(FILE *f)
3344 {
3345  if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3346  return nkf_buf_pop(nkf_state->std_gc_buf);
3347  }
3348  return getc(f);
3349 }
3350 #endif /*WIN32DLL*/
3351 
3352 static nkf_char
3353 std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3354 {
3355  nkf_buf_push(nkf_state->std_gc_buf, c);
3356  return c;
3357 }
3358 
3359 #ifndef WIN32DLL
3360 static void
3361 std_putc(nkf_char c)
3362 {
3363  if(c!=EOF)
3364  putchar(c);
3365 }
3366 #endif /*WIN32DLL*/
3367 
3368 static nkf_char hold_buf[HOLD_SIZE*2];
3369 static int hold_count = 0;
3370 static nkf_char
3371 push_hold_buf(nkf_char c2)
3372 {
3373  if (hold_count >= HOLD_SIZE*2)
3374  return (EOF);
3375  hold_buf[hold_count++] = c2;
3376  return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3377 }
3378 
3379 static int
3380 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3381 {
3382  int ret;
3383  int hold_index;
3384  int fromhold_count;
3385  nkf_char c3, c4;
3386 
3391  hold_count = 0;
3392  push_hold_buf(c1);
3393  push_hold_buf(c2);
3394 
3395  while ((c2 = (*i_getc)(f)) != EOF) {
3396  if (c2 == ESC){
3397  (*i_ungetc)(c2,f);
3398  break;
3399  }
3400  code_status(c2);
3401  if (push_hold_buf(c2) == EOF || estab_f) {
3402  break;
3403  }
3404  }
3405 
3406  if (!estab_f) {
3407  struct input_code *p = input_code_list;
3408  struct input_code *result = p;
3409  if (c2 == EOF) {
3410  code_status(c2);
3411  }
3412  while (p->name) {
3413  if (p->status_func && p->score < result->score) {
3414  result = p;
3415  }
3416  p++;
3417  }
3418  set_iconv(TRUE, result->iconv_func);
3419  }
3420 
3421 
3431  ret = c2;
3432  hold_index = 0;
3433  while (hold_index < hold_count){
3434  c1 = hold_buf[hold_index++];
3435  if (nkf_char_unicode_p(c1)) {
3436  (*oconv)(0, c1);
3437  continue;
3438  }
3439  else if (c1 <= DEL){
3440  (*iconv)(0, c1, 0);
3441  continue;
3442  }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3443  (*iconv)(JIS_X_0201_1976_K, c1, 0);
3444  continue;
3445  }
3446  fromhold_count = 1;
3447  if (hold_index < hold_count){
3448  c2 = hold_buf[hold_index++];
3449  fromhold_count++;
3450  }else{
3451  c2 = (*i_getc)(f);
3452  if (c2 == EOF){
3453  c4 = EOF;
3454  break;
3455  }
3456  code_status(c2);
3457  }
3458  c3 = 0;
3459  switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3460  case -2:
3461  /* 4 bytes UTF-8 */
3462  if (hold_index < hold_count){
3463  c3 = hold_buf[hold_index++];
3464  } else if ((c3 = (*i_getc)(f)) == EOF) {
3465  ret = EOF;
3466  break;
3467  }
3468  code_status(c3);
3469  if (hold_index < hold_count){
3470  c4 = hold_buf[hold_index++];
3471  } else if ((c4 = (*i_getc)(f)) == EOF) {
3472  c3 = ret = EOF;
3473  break;
3474  }
3475  code_status(c4);
3476  (*iconv)(c1, c2, (c3<<8)|c4);
3477  break;
3478  case -3:
3479  /* 4 bytes UTF-8 (check combining character) */
3480  if (hold_index < hold_count){
3481  c3 = hold_buf[hold_index++];
3482  fromhold_count++;
3483  } else if ((c3 = (*i_getc)(f)) == EOF) {
3484  w_iconv_nocombine(c1, c2, 0);
3485  break;
3486  }
3487  if (hold_index < hold_count){
3488  c4 = hold_buf[hold_index++];
3489  fromhold_count++;
3490  } else if ((c4 = (*i_getc)(f)) == EOF) {
3491  w_iconv_nocombine(c1, c2, 0);
3492  if (fromhold_count <= 2)
3493  (*i_ungetc)(c3,f);
3494  else
3495  hold_index--;
3496  continue;
3497  }
3498  if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3499  w_iconv_nocombine(c1, c2, 0);
3500  if (fromhold_count <= 2) {
3501  (*i_ungetc)(c4,f);
3502  (*i_ungetc)(c3,f);
3503  } else if (fromhold_count == 3) {
3504  (*i_ungetc)(c4,f);
3505  hold_index--;
3506  } else {
3507  hold_index -= 2;
3508  }
3509  }
3510  break;
3511  case -1:
3512  /* 3 bytes EUC or UTF-8 */
3513  if (hold_index < hold_count){
3514  c3 = hold_buf[hold_index++];
3515  fromhold_count++;
3516  } else if ((c3 = (*i_getc)(f)) == EOF) {
3517  ret = EOF;
3518  break;
3519  } else {
3520  code_status(c3);
3521  }
3522  if ((*iconv)(c1, c2, c3) == -3) {
3523  /* 6 bytes UTF-8 (check combining character) */
3524  nkf_char c5, c6;
3525  if (hold_index < hold_count){
3526  c4 = hold_buf[hold_index++];
3527  fromhold_count++;
3528  } else if ((c4 = (*i_getc)(f)) == EOF) {
3529  w_iconv_nocombine(c1, c2, c3);
3530  continue;
3531  }
3532  if (hold_index < hold_count){
3533  c5 = hold_buf[hold_index++];
3534  fromhold_count++;
3535  } else if ((c5 = (*i_getc)(f)) == EOF) {
3536  w_iconv_nocombine(c1, c2, c3);
3537  if (fromhold_count == 4)
3538  hold_index--;
3539  else
3540  (*i_ungetc)(c4,f);
3541  continue;
3542  }
3543  if (hold_index < hold_count){
3544  c6 = hold_buf[hold_index++];
3545  fromhold_count++;
3546  } else if ((c6 = (*i_getc)(f)) == EOF) {
3547  w_iconv_nocombine(c1, c2, c3);
3548  if (fromhold_count == 5) {
3549  hold_index -= 2;
3550  } else if (fromhold_count == 4) {
3551  hold_index--;
3552  (*i_ungetc)(c5,f);
3553  } else {
3554  (*i_ungetc)(c5,f);
3555  (*i_ungetc)(c4,f);
3556  }
3557  continue;
3558  }
3559  if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3560  w_iconv_nocombine(c1, c2, c3);
3561  if (fromhold_count == 6) {
3562  hold_index -= 3;
3563  } else if (fromhold_count == 5) {
3564  hold_index -= 2;
3565  (*i_ungetc)(c6,f);
3566  } else if (fromhold_count == 4) {
3567  hold_index--;
3568  (*i_ungetc)(c6,f);
3569  (*i_ungetc)(c5,f);
3570  } else {
3571  (*i_ungetc)(c6,f);
3572  (*i_ungetc)(c5,f);
3573  (*i_ungetc)(c4,f);
3574  }
3575  }
3576  }
3577  break;
3578  }
3579  if (c3 == EOF) break;
3580  }
3581  return ret;
3582 }
3583 
3584 /*
3585  * Check and Ignore BOM
3586  */
3587 static void
3588 check_bom(FILE *f)
3589 {
3590  int c2;
3591  input_bom_f = FALSE;
3592  switch(c2 = (*i_getc)(f)){
3593  case 0x00:
3594  if((c2 = (*i_getc)(f)) == 0x00){
3595  if((c2 = (*i_getc)(f)) == 0xFE){
3596  if((c2 = (*i_getc)(f)) == 0xFF){
3597  if(!input_encoding){
3598  set_iconv(TRUE, w_iconv32);
3599  }
3600  if (iconv == w_iconv32) {
3601  input_bom_f = TRUE;
3602  input_endian = ENDIAN_BIG;
3603  return;
3604  }
3605  (*i_ungetc)(0xFF,f);
3606  }else (*i_ungetc)(c2,f);
3607  (*i_ungetc)(0xFE,f);
3608  }else if(c2 == 0xFF){
3609  if((c2 = (*i_getc)(f)) == 0xFE){
3610  if(!input_encoding){
3611  set_iconv(TRUE, w_iconv32);
3612  }
3613  if (iconv == w_iconv32) {
3614  input_endian = ENDIAN_2143;
3615  return;
3616  }
3617  (*i_ungetc)(0xFF,f);
3618  }else (*i_ungetc)(c2,f);
3619  (*i_ungetc)(0xFF,f);
3620  }else (*i_ungetc)(c2,f);
3621  (*i_ungetc)(0x00,f);
3622  }else (*i_ungetc)(c2,f);
3623  (*i_ungetc)(0x00,f);
3624  break;
3625  case 0xEF:
3626  if((c2 = (*i_getc)(f)) == 0xBB){
3627  if((c2 = (*i_getc)(f)) == 0xBF){
3628  if(!input_encoding){
3629  set_iconv(TRUE, w_iconv);
3630  }
3631  if (iconv == w_iconv) {
3632  input_bom_f = TRUE;
3633  return;
3634  }
3635  (*i_ungetc)(0xBF,f);
3636  }else (*i_ungetc)(c2,f);
3637  (*i_ungetc)(0xBB,f);
3638  }else (*i_ungetc)(c2,f);
3639  (*i_ungetc)(0xEF,f);
3640  break;
3641  case 0xFE:
3642  if((c2 = (*i_getc)(f)) == 0xFF){
3643  if((c2 = (*i_getc)(f)) == 0x00){
3644  if((c2 = (*i_getc)(f)) == 0x00){
3645  if(!input_encoding){
3646  set_iconv(TRUE, w_iconv32);
3647  }
3648  if (iconv == w_iconv32) {
3649  input_endian = ENDIAN_3412;
3650  return;
3651  }
3652  (*i_ungetc)(0x00,f);
3653  }else (*i_ungetc)(c2,f);
3654  (*i_ungetc)(0x00,f);
3655  }else (*i_ungetc)(c2,f);
3656  if(!input_encoding){
3657  set_iconv(TRUE, w_iconv16);
3658  }
3659  if (iconv == w_iconv16) {
3660  input_endian = ENDIAN_BIG;
3661  input_bom_f = TRUE;
3662  return;
3663  }
3664  (*i_ungetc)(0xFF,f);
3665  }else (*i_ungetc)(c2,f);
3666  (*i_ungetc)(0xFE,f);
3667  break;
3668  case 0xFF:
3669  if((c2 = (*i_getc)(f)) == 0xFE){
3670  if((c2 = (*i_getc)(f)) == 0x00){
3671  if((c2 = (*i_getc)(f)) == 0x00){
3672  if(!input_encoding){
3673  set_iconv(TRUE, w_iconv32);
3674  }
3675  if (iconv == w_iconv32) {
3676  input_endian = ENDIAN_LITTLE;
3677  input_bom_f = TRUE;
3678  return;
3679  }
3680  (*i_ungetc)(0x00,f);
3681  }else (*i_ungetc)(c2,f);
3682  (*i_ungetc)(0x00,f);
3683  }else (*i_ungetc)(c2,f);
3684  if(!input_encoding){
3685  set_iconv(TRUE, w_iconv16);
3686  }
3687  if (iconv == w_iconv16) {
3688  input_endian = ENDIAN_LITTLE;
3689  input_bom_f = TRUE;
3690  return;
3691  }
3692  (*i_ungetc)(0xFE,f);
3693  }else (*i_ungetc)(c2,f);
3694  (*i_ungetc)(0xFF,f);
3695  break;
3696  default:
3697  (*i_ungetc)(c2,f);
3698  break;
3699  }
3700 }
3701 
3702 static nkf_char
3703 broken_getc(FILE *f)
3704 {
3705  nkf_char c, c1;
3706 
3707  if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3708  return nkf_buf_pop(nkf_state->broken_buf);
3709  }
3710  c = (*i_bgetc)(f);
3711  if (c=='$' && nkf_state->broken_state != ESC
3712  && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3713  c1= (*i_bgetc)(f);
3714  nkf_state->broken_state = 0;
3715  if (c1=='@'|| c1=='B') {
3716  nkf_buf_push(nkf_state->broken_buf, c1);
3717  nkf_buf_push(nkf_state->broken_buf, c);
3718  return ESC;
3719  } else {
3720  (*i_bungetc)(c1,f);
3721  return c;
3722  }
3723  } else if (c=='(' && nkf_state->broken_state != ESC
3724  && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3725  c1= (*i_bgetc)(f);
3726  nkf_state->broken_state = 0;
3727  if (c1=='J'|| c1=='B') {
3728  nkf_buf_push(nkf_state->broken_buf, c1);
3729  nkf_buf_push(nkf_state->broken_buf, c);
3730  return ESC;
3731  } else {
3732  (*i_bungetc)(c1,f);
3733  return c;
3734  }
3735  } else {
3736  nkf_state->broken_state = c;
3737  return c;
3738  }
3739 }
3740 
3741 static nkf_char
3742 broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3743 {
3744  if (nkf_buf_length(nkf_state->broken_buf) < 2)
3745  nkf_buf_push(nkf_state->broken_buf, c);
3746  return c;
3747 }
3748 
3749 static void
3750 eol_conv(nkf_char c2, nkf_char c1)
3751 {
3752  if (guess_f && input_eol != EOF) {
3753  if (c2 == 0 && c1 == LF) {
3754  if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3755  else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3756  } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3757  else if (!prev_cr);
3758  else if (!input_eol) input_eol = CR;
3759  else if (input_eol != CR) input_eol = EOF;
3760  }
3761  if (prev_cr || (c2 == 0 && c1 == LF)) {
3762  prev_cr = 0;
3763  if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3764  if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3765  }
3766  if (c2 == 0 && c1 == CR) prev_cr = CR;
3767  else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3768 }
3769 
3770 static void
3771 put_newline(void (*func)(nkf_char))
3772 {
3773  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3774  case CRLF:
3775  (*func)(0x0D);
3776  (*func)(0x0A);
3777  break;
3778  case CR:
3779  (*func)(0x0D);
3780  break;
3781  case LF:
3782  (*func)(0x0A);
3783  break;
3784  }
3785 }
3786 
3787 static void
3788 oconv_newline(void (*func)(nkf_char, nkf_char))
3789 {
3790  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3791  case CRLF:
3792  (*func)(0, 0x0D);
3793  (*func)(0, 0x0A);
3794  break;
3795  case CR:
3796  (*func)(0, 0x0D);
3797  break;
3798  case LF:
3799  (*func)(0, 0x0A);
3800  break;
3801  }
3802 }
3803 
3804 /*
3805  Return value of fold_conv()
3806 
3807  LF add newline and output char
3808  CR add newline and output nothing
3809  SP space
3810  0 skip
3811  1 (or else) normal output
3812 
3813  fold state in prev (previous character)
3814 
3815  >0x80 Japanese (X0208/X0201)
3816  <0x80 ASCII
3817  LF new line
3818  SP space
3819 
3820  This fold algorthm does not preserve heading space in a line.
3821  This is the main difference from fmt.
3822  */
3823 
3824 #define char_size(c2,c1) (c2?2:1)
3825 
3826 static void
3827 fold_conv(nkf_char c2, nkf_char c1)
3828 {
3829  nkf_char prev0;
3830  nkf_char fold_state;
3831 
3832  if (c1== CR && !fold_preserve_f) {
3833  fold_state=0; /* ignore cr */
3834  }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3835  f_prev = LF;
3836  fold_state=0; /* ignore cr */
3837  } else if (c1== BS) {
3838  if (f_line>0) f_line--;
3839  fold_state = 1;
3840  } else if (c2==EOF && f_line != 0) { /* close open last line */
3841  fold_state = LF;
3842  } else if ((c1==LF && !fold_preserve_f)
3843  || ((c1==CR||(c1==LF&&f_prev!=CR))
3844  && fold_preserve_f)) {
3845  /* new line */
3846  if (fold_preserve_f) {
3847  f_prev = c1;
3848  f_line = 0;
3849  fold_state = CR;
3850  } else if ((f_prev == c1)
3851  || (f_prev == LF)
3852  ) { /* duplicate newline */
3853  if (f_line) {
3854  f_line = 0;
3855  fold_state = LF; /* output two newline */
3856  } else {
3857  f_line = 0;
3858  fold_state = 1;
3859  }
3860  } else {
3861  if (f_prev&0x80) { /* Japanese? */
3862  f_prev = c1;
3863  fold_state = 0; /* ignore given single newline */
3864  } else if (f_prev==SP) {
3865  fold_state = 0;
3866  } else {
3867  f_prev = c1;
3868  if (++f_line<=fold_len)
3869  fold_state = SP;
3870  else {
3871  f_line = 0;
3872  fold_state = CR; /* fold and output nothing */
3873  }
3874  }
3875  }
3876  } else if (c1=='\f') {
3877  f_prev = LF;
3878  f_line = 0;
3879  fold_state = LF; /* output newline and clear */
3880  } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3881  /* X0208 kankaku or ascii space */
3882  if (f_prev == SP) {
3883  fold_state = 0; /* remove duplicate spaces */
3884  } else {
3885  f_prev = SP;
3886  if (++f_line<=fold_len)
3887  fold_state = SP; /* output ASCII space only */
3888  else {
3889  f_prev = SP; f_line = 0;
3890  fold_state = CR; /* fold and output nothing */
3891  }
3892  }
3893  } else {
3894  prev0 = f_prev; /* we still need this one... , but almost done */
3895  f_prev = c1;
3896  if (c2 || c2 == JIS_X_0201_1976_K)
3897  f_prev |= 0x80; /* this is Japanese */
3898  f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
3899  if (f_line<=fold_len) { /* normal case */
3900  fold_state = 1;
3901  } else {
3902  if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3903  f_line = char_size(c2,c1);
3904  fold_state = LF; /* We can't wait, do fold now */
3905  } else if (c2 == JIS_X_0201_1976_K) {
3906  /* simple kinsoku rules return 1 means no folding */
3907  if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
3908  else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
3909  else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
3910  else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
3911  else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
3912  else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3913  else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3914  f_line = 1;
3915  fold_state = LF;/* add one new f_line before this character */
3916  } else {
3917  f_line = 1;
3918  fold_state = LF;/* add one new f_line before this character */
3919  }
3920  } else if (c2==0) {
3921  /* kinsoku point in ASCII */
3922  if ( c1==')'|| /* { [ ( */
3923  c1==']'||
3924  c1=='}'||
3925  c1=='.'||
3926  c1==','||
3927  c1=='!'||
3928  c1=='?'||
3929  c1=='/'||
3930  c1==':'||
3931  c1==';') {
3932  fold_state = 1;
3933  /* just after special */
3934  } else if (!is_alnum(prev0)) {
3935  f_line = char_size(c2,c1);
3936  fold_state = LF;
3937  } else if ((prev0==SP) || /* ignored new f_line */
3938  (prev0==LF)|| /* ignored new f_line */
3939  (prev0&0x80)) { /* X0208 - ASCII */
3940  f_line = char_size(c2,c1);
3941  fold_state = LF;/* add one new f_line before this character */
3942  } else {
3943  fold_state = 1; /* default no fold in ASCII */
3944  }
3945  } else {
3946  if (c2=='!') {
3947  if (c1=='"') fold_state = 1; /* $B!"(B */
3948  else if (c1=='#') fold_state = 1; /* $B!#(B */
3949  else if (c1=='W') fold_state = 1; /* $B!W(B */
3950  else if (c1=='K') fold_state = 1; /* $B!K(B */
3951  else if (c1=='$') fold_state = 1; /* $B!$(B */
3952  else if (c1=='%') fold_state = 1; /* $B!%(B */
3953  else if (c1=='\'') fold_state = 1; /* $B!\(B */
3954  else if (c1=='(') fold_state = 1; /* $B!((B */
3955  else if (c1==')') fold_state = 1; /* $B!)(B */
3956  else if (c1=='*') fold_state = 1; /* $B!*(B */
3957  else if (c1=='+') fold_state = 1; /* $B!+(B */
3958  else if (c1==',') fold_state = 1; /* $B!,(B */
3959  /* default no fold in kinsoku */
3960  else {
3961  fold_state = LF;
3962  f_line = char_size(c2,c1);
3963  /* add one new f_line before this character */
3964  }
3965  } else {
3966  f_line = char_size(c2,c1);
3967  fold_state = LF;
3968  /* add one new f_line before this character */
3969  }
3970  }
3971  }
3972  }
3973  /* terminator process */
3974  switch(fold_state) {
3975  case LF:
3976  oconv_newline(o_fconv);
3977  (*o_fconv)(c2,c1);
3978  break;
3979  case 0:
3980  return;
3981  case CR:
3982  oconv_newline(o_fconv);
3983  break;
3984  case TAB:
3985  case SP:
3986  (*o_fconv)(0,SP);
3987  break;
3988  default:
3989  (*o_fconv)(c2,c1);
3990  }
3991 }
3992 
3993 static nkf_char z_prev2=0,z_prev1=0;
3994 
3995 static void
3996 z_conv(nkf_char c2, nkf_char c1)
3997 {
3998 
3999  /* if (c2) c1 &= 0x7f; assertion */
4000 
4001  if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
4002  (*o_zconv)(c2,c1);
4003  return;
4004  }
4005 
4006  if (x0201_f) {
4007  if (z_prev2 == JIS_X_0201_1976_K) {
4008  if (c2 == JIS_X_0201_1976_K) {
4009  if (c1 == (0xde&0x7f)) { /* $BByE@(B */
4010  z_prev2 = 0;
4011  (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
4012  return;
4013  } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
4014  z_prev2 = 0;
4015  (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
4016  return;
4017  } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
4018  z_prev2 = 0;
4019  (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
4020  return;
4021  }
4022  }
4023  z_prev2 = 0;
4024  (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
4025  }
4026  if (c2 == JIS_X_0201_1976_K) {
4027  if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
4028  /* wait for $BByE@(B or $BH>ByE@(B */
4029  z_prev1 = c1;
4030  z_prev2 = c2;
4031  return;
4032  } else {
4033  (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
4034  return;
4035  }
4036  }
4037  }
4038 
4039  if (c2 == EOF) {
4040  (*o_zconv)(c2, c1);
4041  return;
4042  }
4043 
4044  if (alpha_f&1 && c2 == 0x23) {
4045  /* JISX0208 Alphabet */
4046  c2 = 0;
4047  } else if (c2 == 0x21) {
4048  /* JISX0208 Kigou */
4049  if (0x21==c1) {
4050  if (alpha_f&2) {
4051  c2 = 0;
4052  c1 = SP;
4053  } else if (alpha_f&4) {
4054  (*o_zconv)(0, SP);
4055  (*o_zconv)(0, SP);
4056  return;
4057  }
4058  } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4059  c2 = 0;
4060  c1 = fv[c1-0x20];
4061  }
4062  }
4063 
4064  if (alpha_f&8 && c2 == 0) {
4065  /* HTML Entity */
4066  const char *entity = 0;
4067  switch (c1){
4068  case '>': entity = "&gt;"; break;
4069  case '<': entity = "&lt;"; break;
4070  case '\"': entity = "&quot;"; break;
4071  case '&': entity = "&amp;"; break;
4072  }
4073  if (entity){
4074  while (*entity) (*o_zconv)(0, *entity++);
4075  return;
4076  }
4077  }
4078 
4079  if (alpha_f & 16) {
4080  /* JIS X 0208 Katakana to JIS X 0201 Katakana */
4081  if (c2 == 0x21) {
4082  nkf_char c = 0;
4083  switch (c1) {
4084  case 0x23:
4085  /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
4086  c = 0xA1;
4087  break;
4088  case 0x56:
4089  /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
4090  c = 0xA2;
4091  break;
4092  case 0x57:
4093  /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
4094  c = 0xA3;
4095  break;
4096  case 0x22:
4097  /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
4098  c = 0xA4;
4099  break;
4100  case 0x26:
4101  /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
4102  c = 0xA5;
4103  break;
4104  case 0x3C:
4105  /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
4106  c = 0xB0;
4107  break;
4108  case 0x2B:
4109  /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
4110  c = 0xDE;
4111  break;
4112  case 0x2C:
4113  /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
4114  c = 0xDF;
4115  break;
4116  }
4117  if (c) {
4118  (*o_zconv)(JIS_X_0201_1976_K, c);
4119  return;
4120  }
4121  } else if (c2 == 0x25) {
4122  /* JISX0208 Katakana */
4123  static const int fullwidth_to_halfwidth[] =
4124  {
4125  0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4126  0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4127  0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4128  0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4129  0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4130  0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4131  0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4132  0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4133  0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4134  0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4135  0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4136  0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4137  };
4138  if (fullwidth_to_halfwidth[c1-0x20]){
4139  c2 = fullwidth_to_halfwidth[c1-0x20];
4140  (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
4141  if (c2 & 0xFF) {
4142  (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
4143  }
4144  return;
4145  }
4146  } else if (c2 == 0 && nkf_char_unicode_p(c1) &&
4147  ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */
4148  (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
4149  return;
4150  }
4151  }
4152  (*o_zconv)(c2,c1);
4153 }
4154 
4155 
4156 #define rot13(c) ( \
4157  ( c < 'A') ? c: \
4158  (c <= 'M') ? (c + 13): \
4159  (c <= 'Z') ? (c - 13): \
4160  (c < 'a') ? (c): \
4161  (c <= 'm') ? (c + 13): \
4162  (c <= 'z') ? (c - 13): \
4163  (c) \
4164  )
4165 
4166 #define rot47(c) ( \
4167  ( c < '!') ? c: \
4168  ( c <= 'O') ? (c + 47) : \
4169  ( c <= '~') ? (c - 47) : \
4170  c \
4171  )
4172 
4173 static void
4174 rot_conv(nkf_char c2, nkf_char c1)
4175 {
4176  if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
4177  c1 = rot13(c1);
4178  } else if (c2) {
4179  c1 = rot47(c1);
4180  c2 = rot47(c2);
4181  }
4182  (*o_rot_conv)(c2,c1);
4183 }
4184 
4185 static void
4186 hira_conv(nkf_char c2, nkf_char c1)
4187 {
4188  if (hira_f & 1) {
4189  if (c2 == 0x25) {
4190  if (0x20 < c1 && c1 < 0x74) {
4191  c2 = 0x24;
4192  (*o_hira_conv)(c2,c1);
4193  return;
4194  } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
4195  c2 = 0;
4196  c1 = nkf_char_unicode_new(0x3094);
4197  (*o_hira_conv)(c2,c1);
4198  return;
4199  }
4200  } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4201  c1 += 2;
4202  (*o_hira_conv)(c2,c1);
4203  return;
4204  }
4205  }
4206  if (hira_f & 2) {
4207  if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
4208  c2 = 0x25;
4209  c1 = 0x74;
4210  } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4211  c2 = 0x25;
4212  } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4213  c1 -= 2;
4214  }
4215  }
4216  (*o_hira_conv)(c2,c1);
4217 }
4218 
4219 
4220 static void
4221 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
4222 {
4223 #define RANGE_NUM_MAX 18
4224  static const nkf_char range[RANGE_NUM_MAX][2] = {
4225  {0x222f, 0x2239,},
4226  {0x2242, 0x2249,},
4227  {0x2251, 0x225b,},
4228  {0x226b, 0x2271,},
4229  {0x227a, 0x227d,},
4230  {0x2321, 0x232f,},
4231  {0x233a, 0x2340,},
4232  {0x235b, 0x2360,},
4233  {0x237b, 0x237e,},
4234  {0x2474, 0x247e,},
4235  {0x2577, 0x257e,},
4236  {0x2639, 0x2640,},
4237  {0x2659, 0x267e,},
4238  {0x2742, 0x2750,},
4239  {0x2772, 0x277e,},
4240  {0x2841, 0x287e,},
4241  {0x4f54, 0x4f7e,},
4242  {0x7425, 0x747e},
4243  };
4244  nkf_char i;
4245  nkf_char start, end, c;
4246 
4247  if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4248  c2 = GETA1;
4249  c1 = GETA2;
4250  }
4251  if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4252  c2 = GETA1;
4253  c1 = GETA2;
4254  }
4255 
4256  for (i = 0; i < RANGE_NUM_MAX; i++) {
4257  start = range[i][0];
4258  end = range[i][1];
4259  c = (c2 << 8) + c1;
4260  if (c >= start && c <= end) {
4261  c2 = GETA1;
4262  c1 = GETA2;
4263  }
4264  }
4265  (*o_iso2022jp_check_conv)(c2,c1);
4266 }
4267 
4268 
4269 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4270 
4271 static const unsigned char *mime_pattern[] = {
4272  (const unsigned char *)"\075?EUC-JP?B?",
4273  (const unsigned char *)"\075?SHIFT_JIS?B?",
4274  (const unsigned char *)"\075?ISO-8859-1?Q?",
4275  (const unsigned char *)"\075?ISO-8859-1?B?",
4276  (const unsigned char *)"\075?ISO-2022-JP?B?",
4277  (const unsigned char *)"\075?ISO-2022-JP?B?",
4278  (const unsigned char *)"\075?ISO-2022-JP?Q?",
4279 #if defined(UTF8_INPUT_ENABLE)
4280  (const unsigned char *)"\075?UTF-8?B?",
4281  (const unsigned char *)"\075?UTF-8?Q?",
4282 #endif
4283  (const unsigned char *)"\075?US-ASCII?Q?",
4284  NULL
4285 };
4286 
4287 
4288 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
4290  e_iconv, s_iconv, 0, 0, 0, 0, 0,
4291 #if defined(UTF8_INPUT_ENABLE)
4292  w_iconv, w_iconv,
4293 #endif
4294  0,
4295 };
4296 
4297 static const nkf_char mime_encode[] = {
4299 #if defined(UTF8_INPUT_ENABLE)
4300  UTF_8, UTF_8,
4301 #endif
4302  ASCII,
4303  0
4304 };
4305 
4306 static const nkf_char mime_encode_method[] = {
4307  'B', 'B','Q', 'B', 'B', 'B', 'Q',
4308 #if defined(UTF8_INPUT_ENABLE)
4309  'B', 'Q',
4310 #endif
4311  'Q',
4312  0
4313 };
4314 
4315 
4316 /* MIME preprocessor fifo */
4317 
4318 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
4319 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
4320 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
4321 static struct {
4322  unsigned char buf[MIME_BUF_SIZE];
4323  unsigned int top;
4324  unsigned int last; /* decoded */
4325  unsigned int input; /* undecoded */
4326 } mime_input_state;
4327 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
4328 
4329 #define MAXRECOVER 20
4330 
4331 static void
4332 mime_input_buf_unshift(nkf_char c)
4333 {
4334  mime_input_buf(--mime_input_state.top) = (unsigned char)c;
4335 }
4336 
4337 static nkf_char
4338 mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
4339 {
4340  mime_input_buf_unshift(c);
4341  return c;
4342 }
4343 
4344 static nkf_char
4345 mime_ungetc_buf(nkf_char c, FILE *f)
4346 {
4347  if (mimebuf_f)
4348  (*i_mungetc_buf)(c,f);
4349  else
4350  mime_input_buf(--mime_input_state.input) = (unsigned char)c;
4351  return c;
4352 }
4353 
4354 static nkf_char
4355 mime_getc_buf(FILE *f)
4356 {
4357  /* we don't keep eof of mime_input_buf, because it contains ?= as
4358  a terminator. It was checked in mime_integrity. */
4359  return ((mimebuf_f)?
4360  (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
4361 }
4362 
4363 static void
4364 switch_mime_getc(void)
4365 {
4366  if (i_getc!=mime_getc) {
4367  i_mgetc = i_getc; i_getc = mime_getc;
4368  i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4369  if(mime_f==STRICT_MIME) {
4370  i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4371  i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4372  }
4373  }
4374 }
4375 
4376 static void
4377 unswitch_mime_getc(void)
4378 {
4379  if(mime_f==STRICT_MIME) {
4380  i_mgetc = i_mgetc_buf;
4381  i_mungetc = i_mungetc_buf;
4382  }
4383  i_getc = i_mgetc;
4384  i_ungetc = i_mungetc;
4385  if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4386  mime_iconv_back = NULL;
4387 }
4388 
4389 static nkf_char
4390 mime_integrity(FILE *f, const unsigned char *p)
4391 {
4392  nkf_char c,d;
4393  unsigned int q;
4394  /* In buffered mode, read until =? or NL or buffer full
4395  */
4396  mime_input_state.input = mime_input_state.top;
4397  mime_input_state.last = mime_input_state.top;
4398 
4399  while(*p) mime_input_buf(mime_input_state.input++) = *p++;
4400  d = 0;
4401  q = mime_input_state.input;
4402  while((c=(*i_getc)(f))!=EOF) {
4403  if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
4404  break; /* buffer full */
4405  }
4406  if (c=='=' && d=='?') {
4407  /* checked. skip header, start decode */
4408  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4409  /* mime_last_input = mime_input_state.input; */
4410  mime_input_state.input = q;
4411  switch_mime_getc();
4412  return 1;
4413  }
4414  if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4415  break;
4416  /* Should we check length mod 4? */
4417  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4418  d=c;
4419  }
4420  /* In case of Incomplete MIME, no MIME decode */
4421  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4422  mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4423  mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4424  switch_mime_getc(); /* anyway we need buffered getc */
4425  return 1;
4426 }
4427 
4428 static nkf_char
4429 mime_begin_strict(FILE *f)
4430 {
4431  nkf_char c1 = 0;
4432  int i,j,k;
4433  const unsigned char *p,*q;
4434  nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4435 
4436  mime_decode_mode = FALSE;
4437  /* =? has been checked */
4438  j = 0;
4439  p = mime_pattern[j];
4440  r[0]='='; r[1]='?';
4441 
4442  for(i=2;p[i]>SP;i++) { /* start at =? */
4443  if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4444  /* pattern fails, try next one */
4445  q = p;
4446  while (mime_pattern[++j]) {
4447  p = mime_pattern[j];
4448  for(k=2;k<i;k++) /* assume length(p) > i */
4449  if (p[k]!=q[k]) break;
4450  if (k==i && nkf_toupper(c1)==p[k]) break;
4451  }
4452  p = mime_pattern[j];
4453  if (p) continue; /* found next one, continue */
4454  /* all fails, output from recovery buffer */
4455  (*i_ungetc)(c1,f);
4456  for(j=0;j<i;j++) {
4457  (*oconv)(0,r[j]);
4458  }
4459  return c1;
4460  }
4461  }
4462  mime_decode_mode = p[i-2];
4463 
4464  mime_iconv_back = iconv;
4465  set_iconv(FALSE, mime_priority_func[j]);
4466  clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4467 
4468  if (mime_decode_mode=='B') {
4469  mimebuf_f = unbuf_f;
4470  if (!unbuf_f) {
4471  /* do MIME integrity check */
4472  return mime_integrity(f,mime_pattern[j]);
4473  }
4474  }
4475  switch_mime_getc();
4476  mimebuf_f = TRUE;
4477  return c1;
4478 }
4479 
4480 static nkf_char
4481 mime_begin(FILE *f)
4482 {
4483  nkf_char c1 = 0;
4484  int i,k;
4485 
4486  /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4487  /* re-read and convert again from mime_buffer. */
4488 
4489  /* =? has been checked */
4490  k = mime_input_state.last;
4491  mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4492  for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4493  /* We accept any character type even if it is breaked by new lines */
4494  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4495  if (c1==LF||c1==SP||c1==CR||
4496  c1=='-'||c1=='_'||is_alnum(c1)) continue;
4497  if (c1=='=') {
4498  /* Failed. But this could be another MIME preemble */
4499  (*i_ungetc)(c1,f);
4500  mime_input_state.last--;
4501  break;
4502  }
4503  if (c1!='?') break;
4504  else {
4505  /* c1=='?' */
4506  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4507  if (!(++i<MAXRECOVER) || c1==EOF) break;
4508  if (c1=='b'||c1=='B') {
4509  mime_decode_mode = 'B';
4510  } else if (c1=='q'||c1=='Q') {
4511  mime_decode_mode = 'Q';
4512  } else {
4513  break;
4514  }
4515  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4516  if (!(++i<MAXRECOVER) || c1==EOF) break;
4517  if (c1!='?') {
4518  mime_decode_mode = FALSE;
4519  }
4520  break;
4521  }
4522  }
4523  switch_mime_getc();
4524  if (!mime_decode_mode) {
4525  /* false MIME premble, restart from mime_buffer */
4526  mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4527  /* Since we are in MIME mode until buffer becomes empty, */
4528  /* we never go into mime_begin again for a while. */
4529  return c1;
4530  }
4531  /* discard mime preemble, and goto MIME mode */
4532  mime_input_state.last = k;
4533  /* do no MIME integrity check */
4534  return c1; /* used only for checking EOF */
4535 }
4536 
4537 #ifdef CHECK_OPTION
4538 static void
4539 no_putc(ARG_UNUSED nkf_char c)
4540 {
4541  ;
4542 }
4543 
4544 static void
4545 debug(const char *str)
4546 {
4547  if (debug_f){
4548  fprintf(stderr, "%s\n", str ? str : "NULL");
4549  }
4550 }
4551 #endif
4552 
4553 static void
4554 set_input_codename(const char *codename)
4555 {
4556  if (!input_codename) {
4557  input_codename = codename;
4558  } else if (strcmp(codename, input_codename) != 0) {
4559  input_codename = "";
4560  }
4561 }
4562 
4563 static const char*
4564 get_guessed_code(void)
4565 {
4566  if (input_codename && !*input_codename) {
4567  input_codename = "BINARY";
4568  } else {
4569  struct input_code *p = find_inputcode_byfunc(iconv);
4570  if (!input_codename) {
4571  input_codename = "ASCII";
4572  } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4573  if (p->score & (SCORE_DEPEND|SCORE_CP932))
4574  input_codename = "CP932";
4575  } else if (strcmp(input_codename, "EUC-JP") == 0) {
4576  if (p->score & SCORE_X0213)
4577  input_codename = "EUC-JIS-2004";
4578  else if (p->score & (SCORE_X0212))
4579  input_codename = "EUCJP-MS";
4580  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4581  input_codename = "CP51932";
4582  } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4583  if (p->score & (SCORE_KANA))
4584  input_codename = "CP50221";
4585  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4586  input_codename = "CP50220";
4587  }
4588  }
4589  return input_codename;
4590 }
4591 
4592 #if !defined(PERL_XS) && !defined(WIN32DLL)
4593 static void
4594 print_guessed_code(char *filename)
4595 {
4596  if (filename != NULL) printf("%s: ", filename);
4597  if (input_codename && !*input_codename) {
4598  printf("BINARY\n");
4599  } else {
4600  input_codename = get_guessed_code();
4601  if (guess_f == 1) {
4602  printf("%s\n", input_codename);
4603  } else {
4604  printf("%s%s%s%s\n",
4605  input_codename,
4606  iconv != w_iconv16 && iconv != w_iconv32 ? "" :
4607  input_endian == ENDIAN_LITTLE ? " LE" :
4608  input_endian == ENDIAN_BIG ? " BE" :
4609  "[BUG]",
4610  input_bom_f ? " (BOM)" : "",
4611  input_eol == CR ? " (CR)" :
4612  input_eol == LF ? " (LF)" :
4613  input_eol == CRLF ? " (CRLF)" :
4614  input_eol == EOF ? " (MIXED NL)" :
4615  "");
4616  }
4617  }
4618 }
4619 #endif /*WIN32DLL*/
4620 
4621 #ifdef INPUT_OPTION
4622 
4623 static nkf_char
4624 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4625 {
4626  nkf_char c1, c2, c3;
4627  c1 = (*g)(f);
4628  if (c1 != ch){
4629  return c1;
4630  }
4631  c2 = (*g)(f);
4632  if (!nkf_isxdigit(c2)){
4633  (*u)(c2, f);
4634  return c1;
4635  }
4636  c3 = (*g)(f);
4637  if (!nkf_isxdigit(c3)){
4638  (*u)(c2, f);
4639  (*u)(c3, f);
4640  return c1;
4641  }
4642  return (hex2bin(c2) << 4) | hex2bin(c3);
4643 }
4644 
4645 static nkf_char
4646 cap_getc(FILE *f)
4647 {
4648  return hex_getc(':', f, i_cgetc, i_cungetc);
4649 }
4650 
4651 static nkf_char
4652 cap_ungetc(nkf_char c, FILE *f)
4653 {
4654  return (*i_cungetc)(c, f);
4655 }
4656 
4657 static nkf_char
4658 url_getc(FILE *f)
4659 {
4660  return hex_getc('%', f, i_ugetc, i_uungetc);
4661 }
4662 
4663 static nkf_char
4664 url_ungetc(nkf_char c, FILE *f)
4665 {
4666  return (*i_uungetc)(c, f);
4667 }
4668 #endif
4669 
4670 #ifdef NUMCHAR_OPTION
4671 static nkf_char
4672 numchar_getc(FILE *f)
4673 {
4674  nkf_char (*g)(FILE *) = i_ngetc;
4675  nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4676  int i = 0, j;
4677  nkf_char buf[12];
4678  nkf_char c = -1;
4679 
4680  buf[i] = (*g)(f);
4681  if (buf[i] == '&'){
4682  buf[++i] = (*g)(f);
4683  if (buf[i] == '#'){
4684  c = 0;
4685  buf[++i] = (*g)(f);
4686  if (buf[i] == 'x' || buf[i] == 'X'){
4687  for (j = 0; j < 7; j++){
4688  buf[++i] = (*g)(f);
4689  if (!nkf_isxdigit(buf[i])){
4690  if (buf[i] != ';'){
4691  c = -1;
4692  }
4693  break;
4694  }
4695  c <<= 4;
4696  c |= hex2bin(buf[i]);
4697  }
4698  }else{
4699  for (j = 0; j < 8; j++){
4700  if (j){
4701  buf[++i] = (*g)(f);
4702  }
4703  if (!nkf_isdigit(buf[i])){
4704  if (buf[i] != ';'){
4705  c = -1;
4706  }
4707  break;
4708  }
4709  c *= 10;
4710  c += hex2bin(buf[i]);
4711  }
4712  }
4713  }
4714  }
4715  if (c != -1){
4716  return nkf_char_unicode_new(c);
4717  }
4718  while (i > 0){
4719  (*u)(buf[i], f);
4720  --i;
4721  }
4722  return buf[0];
4723 }
4724 
4725 static nkf_char
4726 numchar_ungetc(nkf_char c, FILE *f)
4727 {
4728  return (*i_nungetc)(c, f);
4729 }
4730 #endif
4731 
4732 #ifdef UNICODE_NORMALIZATION
4733 
4734 static nkf_char
4735 nfc_getc(FILE *f)
4736 {
4737  nkf_char (*g)(FILE *f) = i_nfc_getc;
4738  nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4739  nkf_buf_t *buf = nkf_state->nfc_buf;
4740  const unsigned char *array;
4741  int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4742  nkf_char c = (*g)(f);
4743 
4744  if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4745 
4746  nkf_buf_push(buf, c);
4747  do {
4748  while (lower <= upper) {
4749  int mid = (lower+upper) / 2;
4750  int len;
4751  array = normalization_table[mid].nfd;
4752  for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4753  if (len >= nkf_buf_length(buf)) {
4754  c = (*g)(f);
4755  if (c == EOF) {
4756  len = 0;
4757  lower = 1, upper = 0;
4758  break;
4759  }
4760  nkf_buf_push(buf, c);
4761  }
4762  if (array[len] != nkf_buf_at(buf, len)) {
4763  if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4764  else upper = mid - 1;
4765  len = 0;
4766  break;
4767  }
4768  }
4769  if (len > 0) {
4770  int i;
4771  array = normalization_table[mid].nfc;
4772  nkf_buf_clear(buf);
4773  for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4774  nkf_buf_push(buf, array[i]);
4775  break;
4776  }
4777  }
4778  } while (lower <= upper);
4779 
4780  while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4781  c = nkf_buf_pop(buf);
4782 
4783  return c;
4784 }
4785 
4786 static nkf_char
4787 nfc_ungetc(nkf_char c, FILE *f)
4788 {
4789  return (*i_nfc_ungetc)(c, f);
4790 }
4791 #endif /* UNICODE_NORMALIZATION */
4792 
4793 
4794 static nkf_char
4795 base64decode(nkf_char c)
4796 {
4797  int i;
4798  if (c > '@') {
4799  if (c < '[') {
4800  i = c - 'A'; /* A..Z 0-25 */
4801  } else if (c == '_') {
4802  i = '?' /* 63 */ ; /* _ 63 */
4803  } else {
4804  i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4805  }
4806  } else if (c > '/') {
4807  i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4808  } else if (c == '+' || c == '-') {
4809  i = '>' /* 62 */ ; /* + and - 62 */
4810  } else {
4811  i = '?' /* 63 */ ; /* / 63 */
4812  }
4813  return (i);
4814 }
4815 
4816 static nkf_char
4817 mime_getc(FILE *f)
4818 {
4819  nkf_char c1, c2, c3, c4, cc;
4820  nkf_char t1, t2, t3, t4, mode, exit_mode;
4821  nkf_char lwsp_count;
4822  char *lwsp_buf;
4823  char *lwsp_buf_new;
4824  nkf_char lwsp_size = 128;
4825 
4826  if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4827  return mime_input_buf(mime_input_state.top++);
4828  }
4829  if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4830  mime_decode_mode=FALSE;
4831  unswitch_mime_getc();
4832  return (*i_getc)(f);
4833  }
4834 
4835  if (mimebuf_f == FIXED_MIME)
4836  exit_mode = mime_decode_mode;
4837  else
4838  exit_mode = FALSE;
4839  if (mime_decode_mode == 'Q') {
4840  if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4841  restart_mime_q:
4842  if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4843  if (c1<=SP || DEL<=c1) {
4844  mime_decode_mode = exit_mode; /* prepare for quit */
4845  return c1;
4846  }
4847  if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4848  return c1;
4849  }
4850 
4851  mime_decode_mode = exit_mode; /* prepare for quit */
4852  if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4853  if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4854  /* end Q encoding */
4855  input_mode = exit_mode;
4856  lwsp_count = 0;
4857  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4858  while ((c1=(*i_getc)(f))!=EOF) {
4859  switch (c1) {
4860  case LF:
4861  case CR:
4862  if (c1==LF) {
4863  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4864  i_ungetc(SP,f);
4865  continue;
4866  } else {
4867  i_ungetc(c1,f);
4868  }
4869  c1 = LF;
4870  } else {
4871  if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4872  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4873  i_ungetc(SP,f);
4874  continue;
4875  } else {
4876  i_ungetc(c1,f);
4877  }
4878  i_ungetc(LF,f);
4879  } else {
4880  i_ungetc(c1,f);
4881  }
4882  c1 = CR;
4883  }
4884  break;
4885  case SP:
4886  case TAB:
4887  lwsp_buf[lwsp_count] = (unsigned char)c1;
4888  if (lwsp_count++>lwsp_size){
4889  lwsp_size <<= 1;
4890  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4891  lwsp_buf = lwsp_buf_new;
4892  }
4893  continue;
4894  }
4895  break;
4896  }
4897  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4898  i_ungetc(c1,f);
4899  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4900  i_ungetc(lwsp_buf[lwsp_count],f);
4901  c1 = lwsp_buf[0];
4902  }
4903  nkf_xfree(lwsp_buf);
4904  return c1;
4905  }
4906  if (c1=='='&&c2<SP) { /* this is soft wrap */
4907  while((c1 = (*i_mgetc)(f)) <=SP) {
4908  if (c1 == EOF) return (EOF);
4909  }
4910  mime_decode_mode = 'Q'; /* still in MIME */
4911  goto restart_mime_q;
4912  }
4913  if (c1=='?') {
4914  mime_decode_mode = 'Q'; /* still in MIME */
4915  (*i_mungetc)(c2,f);
4916  return c1;
4917  }
4918  if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4919  if (c2<=SP) return c2;
4920  mime_decode_mode = 'Q'; /* still in MIME */
4921  return ((hex2bin(c2)<<4) + hex2bin(c3));
4922  }
4923 
4924  if (mime_decode_mode != 'B') {
4925  mime_decode_mode = FALSE;
4926  return (*i_mgetc)(f);
4927  }
4928 
4929 
4930  /* Base64 encoding */
4931  /*
4932  MIME allows line break in the middle of
4933  Base64, but we are very pessimistic in decoding
4934  in unbuf mode because MIME encoded code may broken by
4935  less or editor's control sequence (such as ESC-[-K in unbuffered
4936  mode. ignore incomplete MIME.
4937  */
4938  mode = mime_decode_mode;
4939  mime_decode_mode = exit_mode; /* prepare for quit */
4940 
4941  while ((c1 = (*i_mgetc)(f))<=SP) {
4942  if (c1==EOF)
4943  return (EOF);
4944  }
4945  mime_c2_retry:
4946  if ((c2 = (*i_mgetc)(f))<=SP) {
4947  if (c2==EOF)
4948  return (EOF);
4949  if (mime_f != STRICT_MIME) goto mime_c2_retry;
4950  if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4951  return c2;
4952  }
4953  if ((c1 == '?') && (c2 == '=')) {
4954  input_mode = ASCII;
4955  lwsp_count = 0;
4956  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4957  while ((c1=(*i_getc)(f))!=EOF) {
4958  switch (c1) {
4959  case LF:
4960  case CR:
4961  if (c1==LF) {
4962  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4963  i_ungetc(SP,f);
4964  continue;
4965  } else {
4966  i_ungetc(c1,f);
4967  }
4968  c1 = LF;
4969  } else {
4970  if ((c1=(*i_getc)(f))!=EOF) {
4971  if (c1==SP) {
4972  i_ungetc(SP,f);
4973  continue;
4974  } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4975  i_ungetc(SP,f);
4976  continue;
4977  } else {
4978  i_ungetc(c1,f);
4979  }
4980  i_ungetc(LF,f);
4981  } else {
4982  i_ungetc(c1,f);
4983  }
4984  c1 = CR;
4985  }
4986  break;
4987  case SP:
4988  case TAB:
4989  lwsp_buf[lwsp_count] = (unsigned char)c1;
4990  if (lwsp_count++>lwsp_size){
4991  lwsp_size <<= 1;
4992  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4993  lwsp_buf = lwsp_buf_new;
4994  }
4995  continue;
4996  }
4997  break;
4998  }
4999  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
5000  i_ungetc(c1,f);
5001  for(lwsp_count--;lwsp_count>0;lwsp_count--)
5002  i_ungetc(lwsp_buf[lwsp_count],f);
5003  c1 = lwsp_buf[0];
5004  }
5005  nkf_xfree(lwsp_buf);
5006  return c1;
5007  }
5008  mime_c3_retry:
5009  if ((c3 = (*i_mgetc)(f))<=SP) {
5010  if (c3==EOF)
5011  return (EOF);
5012  if (mime_f != STRICT_MIME) goto mime_c3_retry;
5013  if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5014  return c3;
5015  }
5016  mime_c4_retry:
5017  if ((c4 = (*i_mgetc)(f))<=SP) {
5018  if (c4==EOF)
5019  return (EOF);
5020  if (mime_f != STRICT_MIME) goto mime_c4_retry;
5021  if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5022  return c4;
5023  }
5024 
5025  mime_decode_mode = mode; /* still in MIME sigh... */
5026 
5027  /* BASE 64 decoding */
5028 
5029  t1 = 0x3f & base64decode(c1);
5030  t2 = 0x3f & base64decode(c2);
5031  t3 = 0x3f & base64decode(c3);
5032  t4 = 0x3f & base64decode(c4);
5033  cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5034  if (c2 != '=') {
5035  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5036  cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5037  if (c3 != '=') {
5038  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5039  cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5040  if (c4 != '=')
5041  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5042  }
5043  } else {
5044  return c1;
5045  }
5046  return mime_input_buf(mime_input_state.top++);
5047 }
5048 
5049 static const char basis_64[] =
5050  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5051 
5052 #define MIMEOUT_BUF_LENGTH 74
5053 static struct {
5054  unsigned char buf[MIMEOUT_BUF_LENGTH+1];
5055  int count;
5056 } mimeout_state;
5057 
5058 /*nkf_char mime_lastchar2, mime_lastchar1;*/
5059 
5060 static void
5061 open_mime(nkf_char mode)
5062 {
5063  const unsigned char *p;
5064  int i;
5065  int j;
5066  p = mime_pattern[0];
5067  for(i=0;mime_pattern[i];i++) {
5068  if (mode == mime_encode[i]) {
5069  p = mime_pattern[i];
5070  break;
5071  }
5072  }
5073  mimeout_mode = mime_encode_method[i];
5074  i = 0;
5075  if (base64_count>45) {
5076  if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
5077  (*o_mputc)(mimeout_state.buf[i]);
5078  i++;
5079  }
5080  put_newline(o_mputc);
5081  (*o_mputc)(SP);
5082  base64_count = 1;
5083  if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
5084  i++;
5085  }
5086  }
5087  for (;i<mimeout_state.count;i++) {
5088  if (nkf_isspace(mimeout_state.buf[i])) {
5089  (*o_mputc)(mimeout_state.buf[i]);
5090  base64_count ++;
5091  } else {
5092  break;
5093  }
5094  }
5095  while(*p) {
5096  (*o_mputc)(*p++);
5097  base64_count ++;
5098  }
5099  j = mimeout_state.count;
5100  mimeout_state.count = 0;
5101  for (;i<j;i++) {
5102  mime_putc(mimeout_state.buf[i]);
5103  }
5104 }
5105 
5106 static void
5107 mime_prechar(nkf_char c2, nkf_char c1)
5108 {
5109  if (mimeout_mode > 0){
5110  if (c2 == EOF){
5111  if (base64_count + mimeout_state.count/3*4> 73){
5112  (*o_base64conv)(EOF,0);
5113  oconv_newline(o_base64conv);
5114  (*o_base64conv)(0,SP);
5115  base64_count = 1;
5116  }
5117  } else {
5118  if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
5119  (*o_base64conv)(EOF,0);
5120  oconv_newline(o_base64conv);
5121  (*o_base64conv)(0,SP);
5122  base64_count = 1;
5123  mimeout_mode = -1;
5124  }
5125  }
5126  } else if (c2) {
5127  if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
5128  mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
5129  open_mime(output_mode);
5130  (*o_base64conv)(EOF,0);
5131  oconv_newline(o_base64conv);
5132  (*o_base64conv)(0,SP);
5133  base64_count = 1;
5134  mimeout_mode = -1;
5135  }
5136  }
5137 }
5138 
5139 static void
5140 close_mime(void)
5141 {
5142  (*o_mputc)('?');
5143  (*o_mputc)('=');
5144  base64_count += 2;
5145  mimeout_mode = 0;
5146 }
5147 
5148 static void
5149 eof_mime(void)
5150 {
5151  switch(mimeout_mode) {
5152  case 'Q':
5153  case 'B':
5154  break;
5155  case 2:
5156  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
5157  (*o_mputc)('=');
5158  (*o_mputc)('=');
5159  base64_count += 3;
5160  break;
5161  case 1:
5162  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
5163  (*o_mputc)('=');
5164  base64_count += 2;
5165  break;
5166  }
5167  if (mimeout_mode > 0) {
5168  if (mimeout_f!=FIXED_MIME) {
5169  close_mime();
5170  } else if (mimeout_mode != 'Q')
5171  mimeout_mode = 'B';
5172  }
5173 }
5174 
5175 static void
5176 mimeout_addchar(nkf_char c)
5177 {
5178  switch(mimeout_mode) {
5179  case 'Q':
5180  if (c==CR||c==LF) {
5181  (*o_mputc)(c);
5182  base64_count = 0;
5183  } else if(!nkf_isalnum(c)) {
5184  (*o_mputc)('=');
5185  (*o_mputc)(bin2hex(((c>>4)&0xf)));
5186  (*o_mputc)(bin2hex((c&0xf)));
5187  base64_count += 3;
5188  } else {
5189  (*o_mputc)(c);
5190  base64_count++;
5191  }
5192  break;
5193  case 'B':
5194  nkf_state->mimeout_state=c;
5195  (*o_mputc)(basis_64[c>>2]);
5196  mimeout_mode=2;
5197  base64_count ++;
5198  break;
5199  case 2:
5200  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5201  nkf_state->mimeout_state=c;
5202  mimeout_mode=1;
5203  base64_count ++;
5204  break;
5205  case 1:
5206  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5207  (*o_mputc)(basis_64[c & 0x3F]);
5208  mimeout_mode='B';
5209  base64_count += 2;
5210  break;
5211  default:
5212  (*o_mputc)(c);
5213  base64_count++;
5214  break;
5215  }
5216 }
5217 
5218 static void
5219 mime_putc(nkf_char c)
5220 {
5221  int i, j;
5222  nkf_char lastchar;
5223 
5224  if (mimeout_f == FIXED_MIME){
5225  if (mimeout_mode == 'Q'){
5226  if (base64_count > 71){
5227  if (c!=CR && c!=LF) {
5228  (*o_mputc)('=');
5229  put_newline(o_mputc);
5230  }
5231  base64_count = 0;
5232  }
5233  }else{
5234  if (base64_count > 71){
5235  eof_mime();
5236  put_newline(o_mputc);
5237  base64_count = 0;
5238  }
5239  if (c == EOF) { /* c==EOF */
5240  eof_mime();
5241  }
5242  }
5243  if (c != EOF) { /* c==EOF */
5244  mimeout_addchar(c);
5245  }
5246  return;
5247  }
5248 
5249  /* mimeout_f != FIXED_MIME */
5250 
5251  if (c == EOF) { /* c==EOF */
5252  if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5253  j = mimeout_state.count;
5254  mimeout_state.count = 0;
5255  i = 0;
5256  if (mimeout_mode > 0) {
5257  if (!nkf_isblank(mimeout_state.buf[j-1])) {
5258  for (;i<j;i++) {
5259  if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
5260  break;
5261  }
5262  mimeout_addchar(mimeout_state.buf[i]);
5263  }
5264  eof_mime();
5265  for (;i<j;i++) {
5266  mimeout_addchar(mimeout_state.buf[i]);
5267  }
5268  } else {
5269  for (;i<j;i++) {
5270  mimeout_addchar(mimeout_state.buf[i]);
5271  }
5272  eof_mime();
5273  }
5274  } else {
5275  for (;i<j;i++) {
5276  mimeout_addchar(mimeout_state.buf[i]);
5277  }
5278  }
5279  return;
5280  }
5281 
5282  if (mimeout_state.count > 0){
5283  lastchar = mimeout_state.buf[mimeout_state.count - 1];
5284  }else{
5285  lastchar = -1;
5286  }
5287 
5288  if (mimeout_mode=='Q') {
5289  if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
5290  if (c == CR || c == LF) {
5291  close_mime();
5292  (*o_mputc)(c);
5293  base64_count = 0;
5294  return;
5295  } else if (c <= SP) {
5296  close_mime();
5297  if (base64_count > 70) {
5298  put_newline(o_mputc);
5299  base64_count = 0;
5300  }
5301  if (!nkf_isblank(c)) {
5302  (*o_mputc)(SP);
5303  base64_count++;
5304  }
5305  } else {
5306  if (base64_count > 70) {
5307  close_mime();
5308  put_newline(o_mputc);
5309  (*o_mputc)(SP);
5310  base64_count = 1;
5311  open_mime(output_mode);
5312  }
5313  if (!nkf_noescape_mime(c)) {
5314  mimeout_addchar(c);
5315  return;
5316  }
5317  }
5318  if (c != 0x1B) {
5319  (*o_mputc)(c);
5320  base64_count++;
5321  return;
5322  }
5323  }
5324  }
5325 
5326  if (mimeout_mode <= 0) {
5327  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5328  output_mode == UTF_8)) {
5329  if (nkf_isspace(c)) {
5330  int flag = 0;
5331  if (mimeout_mode == -1) {
5332  flag = 1;
5333  }
5334  if (c==CR || c==LF) {
5335  if (flag) {
5336  open_mime(output_mode);
5337  output_mode = 0;
5338  } else {
5339  base64_count = 0;
5340  }
5341  }
5342  for (i=0;i<mimeout_state.count;i++) {
5343  (*o_mputc)(mimeout_state.buf[i]);
5344  if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
5345  base64_count = 0;
5346  }else{
5347  base64_count++;
5348  }
5349  }
5350  if (flag) {
5351  eof_mime();
5352  base64_count = 0;
5353  mimeout_mode = 0;
5354  }
5355  mimeout_state.buf[0] = (char)c;
5356  mimeout_state.count = 1;
5357  }else{
5358  if (base64_count > 1
5359  && base64_count + mimeout_state.count > 76
5360  && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
5361  static const char *str = "boundary=\"";
5362  static int len = 10;
5363  i = 0;
5364 
5365  for (; i < mimeout_state.count - len; ++i) {
5366  if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
5367  i += len - 2;
5368  break;
5369  }
5370  }
5371 
5372  if (i == 0 || i == mimeout_state.count - len) {
5373  put_newline(o_mputc);
5374  base64_count = 0;
5375  if (!nkf_isspace(mimeout_state.buf[0])){
5376  (*o_mputc)(SP);
5377  base64_count++;
5378  }
5379  }
5380  else {
5381  int j;
5382  for (j = 0; j <= i; ++j) {
5383  (*o_mputc)(mimeout_state.buf[j]);
5384  }
5385  put_newline(o_mputc);
5386  base64_count = 1;
5387  for (; j <= mimeout_state.count; ++j) {
5388  mimeout_state.buf[j - i] = mimeout_state.buf[j];
5389  }
5390  mimeout_state.count -= i;
5391  }
5392  }
5393  mimeout_state.buf[mimeout_state.count++] = (char)c;
5394  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5395  open_mime(output_mode);
5396  }
5397  }
5398  return;
5399  }else{
5400  if (lastchar==CR || lastchar == LF){
5401  for (i=0;i<mimeout_state.count;i++) {
5402  (*o_mputc)(mimeout_state.buf[i]);
5403  }
5404  base64_count = 0;
5405  mimeout_state.count = 0;
5406  }
5407  if (lastchar==SP) {
5408  for (i=0;i<mimeout_state.count-1;i++) {
5409  (*o_mputc)(mimeout_state.buf[i]);
5410  base64_count++;
5411  }
5412  mimeout_state.buf[0] = SP;
5413  mimeout_state.count = 1;
5414  }
5415  open_mime(output_mode);
5416  }
5417  }else{
5418  /* mimeout_mode == 'B', 1, 2 */
5419  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5420  output_mode == UTF_8)) {
5421  if (lastchar == CR || lastchar == LF){
5422  if (nkf_isblank(c)) {
5423  for (i=0;i<mimeout_state.count;i++) {
5424  mimeout_addchar(mimeout_state.buf[i]);
5425  }
5426  mimeout_state.count = 0;
5427  } else {
5428  eof_mime();
5429  for (i=0;i<mimeout_state.count;i++) {
5430  (*o_mputc)(mimeout_state.buf[i]);
5431  }
5432  base64_count = 0;
5433  mimeout_state.count = 0;
5434  }
5435  mimeout_state.buf[mimeout_state.count++] = (char)c;
5436  return;
5437  }
5438  if (nkf_isspace(c)) {
5439  for (i=0;i<mimeout_state.count;i++) {
5440  if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5441  eof_mime();
5442  for (i=0;i<mimeout_state.count;i++) {
5443  (*o_mputc)(mimeout_state.buf[i]);
5444  base64_count++;
5445  }
5446  mimeout_state.count = 0;
5447  }
5448  }
5449  mimeout_state.buf[mimeout_state.count++] = (char)c;
5450  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5451  eof_mime();
5452  for (j=0;j<mimeout_state.count;j++) {
5453  (*o_mputc)(mimeout_state.buf[j]);
5454  base64_count++;
5455  }
5456  mimeout_state.count = 0;
5457  }
5458  return;
5459  }
5460  if (mimeout_state.count>0 && SP<c && c!='=') {
5461  mimeout_state.buf[mimeout_state.count++] = (char)c;
5462  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5463  j = mimeout_state.count;
5464  mimeout_state.count = 0;
5465  for (i=0;i<j;i++) {
5466  mimeout_addchar(mimeout_state.buf[i]);
5467  }
5468  }
5469  return;
5470  }
5471  }
5472  }
5473  if (mimeout_state.count>0) {
5474  j = mimeout_state.count;
5475  mimeout_state.count = 0;
5476  for (i=0;i<j;i++) {
5477  if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5478  break;
5479  mimeout_addchar(mimeout_state.buf[i]);
5480  }
5481  if (i<j) {
5482  eof_mime();
5483  base64_count=0;
5484  for (;i<j;i++) {
5485  (*o_mputc)(mimeout_state.buf[i]);
5486  }
5487  open_mime(output_mode);
5488  }
5489  }
5490  mimeout_addchar(c);
5491 }
5492 
5493 static void
5494 base64_conv(nkf_char c2, nkf_char c1)
5495 {
5496  mime_prechar(c2, c1);
5497  (*o_base64conv)(c2,c1);
5498 }
5499 
5500 #ifdef HAVE_ICONV_H
5501 typedef struct nkf_iconv_t {
5502  iconv_t cd;
5503  char *input_buffer;
5504  size_t input_buffer_size;
5505  char *output_buffer;
5506  size_t output_buffer_size;
5507 };
5508 
5509 static nkf_iconv_t
5510 nkf_iconv_new(char *tocode, char *fromcode)
5511 {
5512  nkf_iconv_t converter;
5513 
5514  converter->input_buffer_size = IOBUF_SIZE;
5515  converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5516  converter->output_buffer_size = IOBUF_SIZE * 2;
5517  converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5518  converter->cd = iconv_open(tocode, fromcode);
5519  if (converter->cd == (iconv_t)-1)
5520  {
5521  switch (errno) {
5522  case EINVAL:
5523  perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5524  return -1;
5525  default:
5526  perror("can't iconv_open");
5527  }
5528  }
5529 }
5530 
5531 static size_t
5532 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5533 {
5534  size_t invalid = (size_t)0;
5535  char *input_buffer = converter->input_buffer;
5536  size_t input_length = (size_t)0;
5537  char *output_buffer = converter->output_buffer;
5538  size_t output_length = converter->output_buffer_size;
5539  int c;
5540 
5541  do {
5542  if (c != EOF) {
5543  while ((c = (*i_getc)(f)) != EOF) {
5544  input_buffer[input_length++] = c;
5545  if (input_length < converter->input_buffer_size) break;
5546  }
5547  }
5548 
5549  size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5550  while (output_length-- > 0) {
5551  (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5552  }
5553  if (ret == (size_t) - 1) {
5554  switch (errno) {
5555  case EINVAL:
5556  if (input_buffer != converter->input_buffer)
5557  memmove(converter->input_buffer, input_buffer, input_length);
5558  break;
5559  case E2BIG:
5560  converter->output_buffer_size *= 2;
5561  output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5562  if (output_buffer == NULL) {
5563  perror("can't realloc");
5564  return -1;
5565  }
5566  converter->output_buffer = output_buffer;
5567  break;
5568  default:
5569  perror("can't iconv");
5570  return -1;
5571  }
5572  } else {
5573  invalid += ret;
5574  }
5575  } while (1);
5576 
5577  return invalid;
5578 }
5579 
5580 
5581 static void
5582 nkf_iconv_close(nkf_iconv_t *convert)
5583 {
5584  nkf_xfree(converter->inbuf);
5585  nkf_xfree(converter->outbuf);
5586  iconv_close(converter->cd);
5587 }
5588 #endif
5589 
5590 
5591 static void
5592 reinit(void)
5593 {
5594  {
5595  struct input_code *p = input_code_list;
5596  while (p->name){
5597  status_reinit(p++);
5598  }
5599  }
5600  unbuf_f = FALSE;
5601  estab_f = FALSE;
5602  nop_f = FALSE;
5603  binmode_f = TRUE;
5604  rot_f = FALSE;
5605  hira_f = FALSE;
5606  alpha_f = FALSE;
5607  mime_f = MIME_DECODE_DEFAULT;
5608  mime_decode_f = FALSE;
5609  mimebuf_f = FALSE;
5610  broken_f = FALSE;
5611  iso8859_f = FALSE;
5612  mimeout_f = FALSE;
5613  x0201_f = NKF_UNSPECIFIED;
5614  iso2022jp_f = FALSE;
5615 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5616  ms_ucs_map_f = UCS_MAP_ASCII;
5617 #endif
5618 #ifdef UTF8_INPUT_ENABLE
5619  no_cp932ext_f = FALSE;
5620  no_best_fit_chars_f = FALSE;
5621  encode_fallback = NULL;
5622  unicode_subchar = '?';
5623  input_endian = ENDIAN_BIG;
5624 #endif
5625 #ifdef UTF8_OUTPUT_ENABLE
5626  output_bom_f = FALSE;
5627  output_endian = ENDIAN_BIG;
5628 #endif
5629 #ifdef UNICODE_NORMALIZATION
5630  nfc_f = FALSE;
5631 #endif
5632 #ifdef INPUT_OPTION
5633  cap_f = FALSE;
5634  url_f = FALSE;
5635  numchar_f = FALSE;
5636 #endif
5637 #ifdef CHECK_OPTION
5638  noout_f = FALSE;
5639  debug_f = FALSE;
5640 #endif
5641  guess_f = 0;
5642 #ifdef EXEC_IO
5643  exec_f = 0;
5644 #endif
5645 #ifdef SHIFTJIS_CP932
5646  cp51932_f = TRUE;
5647  cp932inv_f = TRUE;
5648 #endif
5649 #ifdef X0212_ENABLE
5650  x0212_f = FALSE;
5651  x0213_f = FALSE;
5652 #endif
5653  {
5654  int i;
5655  for (i = 0; i < 256; i++){
5656  prefix_table[i] = 0;
5657  }
5658  }
5659  hold_count = 0;
5660  mimeout_state.count = 0;
5661  mimeout_mode = 0;
5662  base64_count = 0;
5663  f_line = 0;
5664  f_prev = 0;
5665  fold_preserve_f = FALSE;
5666  fold_f = FALSE;
5667  fold_len = 0;
5668  kanji_intro = DEFAULT_J;
5669  ascii_intro = DEFAULT_R;
5670  fold_margin = FOLD_MARGIN;
5671  o_zconv = no_connection;
5672  o_fconv = no_connection;
5673  o_eol_conv = no_connection;
5674  o_rot_conv = no_connection;
5675  o_hira_conv = no_connection;
5676  o_base64conv = no_connection;
5677  o_iso2022jp_check_conv = no_connection;
5678  o_putc = std_putc;
5679  i_getc = std_getc;
5680  i_ungetc = std_ungetc;
5681  i_bgetc = std_getc;
5682  i_bungetc = std_ungetc;
5683  o_mputc = std_putc;
5684  i_mgetc = std_getc;
5685  i_mungetc = std_ungetc;
5686  i_mgetc_buf = std_getc;
5687  i_mungetc_buf = std_ungetc;
5688  output_mode = ASCII;
5689  input_mode = ASCII;
5690  mime_decode_mode = FALSE;
5691  file_out_f = FALSE;
5692  eolmode_f = 0;
5693  input_eol = 0;
5694  prev_cr = 0;
5695  option_mode = 0;
5696  z_prev2=0,z_prev1=0;
5697 #ifdef CHECK_OPTION
5698  iconv_for_check = 0;
5699 #endif
5700  input_codename = NULL;
5701  input_encoding = NULL;
5702  output_encoding = NULL;
5703  nkf_state_init();
5704 #ifdef WIN32DLL
5705  reinitdll();
5706 #endif /*WIN32DLL*/
5707 }
5708 
5709 static int
5710 module_connection(void)
5711 {
5712  if (input_encoding) set_input_encoding(input_encoding);
5713  if (!output_encoding) {
5714  output_encoding = nkf_default_encoding();
5715  }
5716  if (!output_encoding) {
5717  if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5718  else return -1;
5719  }
5720  set_output_encoding(output_encoding);
5721  oconv = nkf_enc_to_oconv(output_encoding);
5722  o_putc = std_putc;
5723  if (nkf_enc_unicode_p(output_encoding))
5724  output_mode = UTF_8;
5725 
5726  if (x0201_f == NKF_UNSPECIFIED) {
5727  x0201_f = X0201_DEFAULT;
5728  }
5729 
5730  /* replace continuation module, from output side */
5731 
5732  /* output redirection */
5733 #ifdef CHECK_OPTION
5734  if (noout_f || guess_f){
5735  o_putc = no_putc;
5736  }
5737 #endif
5738  if (mimeout_f) {
5739  o_mputc = o_putc;
5740  o_putc = mime_putc;
5741  if (mimeout_f == TRUE) {
5742  o_base64conv = oconv; oconv = base64_conv;
5743  }
5744  /* base64_count = 0; */
5745  }
5746 
5747  if (eolmode_f || guess_f) {
5748  o_eol_conv = oconv; oconv = eol_conv;
5749  }
5750  if (rot_f) {
5751  o_rot_conv = oconv; oconv = rot_conv;
5752  }
5753  if (iso2022jp_f) {
5754  o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5755  }
5756  if (hira_f) {
5757  o_hira_conv = oconv; oconv = hira_conv;
5758  }
5759  if (fold_f) {
5760  o_fconv = oconv; oconv = fold_conv;
5761  f_line = 0;
5762  }
5763  if (alpha_f || x0201_f) {
5764  o_zconv = oconv; oconv = z_conv;
5765  }
5766 
5767  i_getc = std_getc;
5768  i_ungetc = std_ungetc;
5769  /* input redirection */
5770 #ifdef INPUT_OPTION
5771  if (cap_f){
5772  i_cgetc = i_getc; i_getc = cap_getc;
5773  i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5774  }
5775  if (url_f){
5776  i_ugetc = i_getc; i_getc = url_getc;
5777  i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5778  }
5779 #endif
5780 #ifdef NUMCHAR_OPTION
5781  if (numchar_f){
5782  i_ngetc = i_getc; i_getc = numchar_getc;
5783  i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5784  }
5785 #endif
5786 #ifdef UNICODE_NORMALIZATION
5787  if (nfc_f){
5788  i_nfc_getc = i_getc; i_getc = nfc_getc;
5789  i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5790  }
5791 #endif
5792  if (mime_f && mimebuf_f==FIXED_MIME) {
5793  i_mgetc = i_getc; i_getc = mime_getc;
5794  i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5795  }
5796  if (broken_f & 1) {
5797  i_bgetc = i_getc; i_getc = broken_getc;
5798  i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5799  }
5800  if (input_encoding) {
5801  set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5802  } else {
5803  set_iconv(FALSE, e_iconv);
5804  }
5805 
5806  {
5807  struct input_code *p = input_code_list;
5808  while (p->name){
5809  status_reinit(p++);
5810  }
5811  }
5812  return 0;
5813 }
5814 
5815 /*
5816  Conversion main loop. Code detection only.
5817  */
5818 
5819 #if !defined(PERL_XS) && !defined(WIN32DLL)
5820 static nkf_char
5821 noconvert(FILE *f)
5822 {
5823  nkf_char c;
5824 
5825  if (nop_f == 2)
5826  module_connection();
5827  while ((c = (*i_getc)(f)) != EOF)
5828  (*o_putc)(c);
5829  (*o_putc)(EOF);
5830  return 1;
5831 }
5832 #endif
5833 
5834 #define NEXT continue /* no output, get next */
5835 #define SKIP c2=0;continue /* no output, get next */
5836 #define MORE c2=c1;continue /* need one more byte */
5837 #define SEND (void)0 /* output c1 and c2, get next */
5838 #define LAST break /* end of loop, go closing */
5839 #define set_input_mode(mode) do { \
5840  input_mode = mode; \
5841  shift_mode = 0; \
5842  set_input_codename("ISO-2022-JP"); \
5843  debug("ISO-2022-JP"); \
5844 } while (0)
5845 
5846 static int
5847 kanji_convert(FILE *f)
5848 {
5849  nkf_char c1=0, c2=0, c3=0, c4=0;
5850  int shift_mode = 0; /* 0, 1, 2, 3 */
5851  int g2 = 0;
5852  int is_8bit = FALSE;
5853 
5854  if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5855  is_8bit = TRUE;
5856  }
5857 
5858  input_mode = ASCII;
5859  output_mode = ASCII;
5860 
5861  if (module_connection() < 0) {
5862 #if !defined(PERL_XS) && !defined(WIN32DLL)
5863  fprintf(stderr, "no output encoding given\n");
5864 #endif
5865  return -1;
5866  }
5867  check_bom(f);
5868 
5869 #ifdef UTF8_INPUT_ENABLE
5870  if(iconv == w_iconv32){
5871  while ((c1 = (*i_getc)(f)) != EOF &&
5872  (c2 = (*i_getc)(f)) != EOF &&
5873  (c3 = (*i_getc)(f)) != EOF &&
5874  (c4 = (*i_getc)(f)) != EOF) {
5875  nkf_char c5, c6, c7, c8;
5876  if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5877  if ((c5 = (*i_getc)(f)) != EOF &&
5878  (c6 = (*i_getc)(f)) != EOF &&
5879  (c7 = (*i_getc)(f)) != EOF &&
5880  (c8 = (*i_getc)(f)) != EOF) {
5881  if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5882  (*i_ungetc)(c8, f);
5883  (*i_ungetc)(c7, f);
5884  (*i_ungetc)(c6, f);
5885  (*i_ungetc)(c5, f);
5886  nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5887  }
5888  } else {
5889  nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5890  }
5891  }
5892  }
5893  goto finished;
5894  }
5895  else if (iconv == w_iconv16) {
5896  while ((c1 = (*i_getc)(f)) != EOF &&
5897  (c2 = (*i_getc)(f)) != EOF) {
5898  size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5899  if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5900  (c3 = (*i_getc)(f)) != EOF &&
5901  (c4 = (*i_getc)(f)) != EOF) {
5902  nkf_iconv_utf_16(c1, c2, c3, c4);
5903  } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5904  if ((c3 = (*i_getc)(f)) != EOF &&
5905  (c4 = (*i_getc)(f)) != EOF) {
5906  if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5907  (*i_ungetc)(c4, f);
5908  (*i_ungetc)(c3, f);
5909  nkf_iconv_utf_16_nocombine(c1, c2);
5910  }
5911  } else {
5912  nkf_iconv_utf_16_nocombine(c1, c2);
5913  }
5914  }
5915  }
5916  goto finished;
5917  }
5918 #endif
5919 
5920  while ((c1 = (*i_getc)(f)) != EOF) {
5921 #ifdef INPUT_CODE_FIX
5922  if (!input_encoding)
5923 #endif
5924  code_status(c1);
5925  if (c2) {
5926  /* second byte */
5927  if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
5928  /* in case of 8th bit is on */
5929  if (!estab_f&&!mime_decode_mode) {
5930  /* in case of not established yet */
5931  /* It is still ambiguous */
5932  if (h_conv(f, c2, c1)==EOF) {
5933  LAST;
5934  }
5935  else {
5936  SKIP;
5937  }
5938  }
5939  else {
5940  /* in case of already established */
5941  if (c1 < 0x40) {
5942  /* ignore bogus code */
5943  SKIP;
5944  } else {
5945  SEND;
5946  }
5947  }
5948  }
5949  else {
5950  /* 2nd byte of 7 bit code or SJIS */
5951  SEND;
5952  }
5953  }
5954  else if (nkf_char_unicode_p(c1)) {
5955  (*oconv)(0, c1);
5956  NEXT;
5957  }
5958  else {
5959  /* first byte */
5960  if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5961  /* CP5022x */
5962  MORE;
5963  }else if (input_codename && input_codename[0] == 'I' &&
5964  0xA1 <= c1 && c1 <= 0xDF) {
5965  /* JIS X 0201 Katakana in 8bit JIS */
5966  c2 = JIS_X_0201_1976_K;
5967  c1 &= 0x7f;
5968  SEND;
5969  } else if (c1 > DEL) {
5970  /* 8 bit code */
5971  if (!estab_f && !iso8859_f) {
5972  /* not established yet */
5973  MORE;
5974  } else { /* estab_f==TRUE */
5975  if (iso8859_f) {
5976  c2 = ISO_8859_1;
5977  c1 &= 0x7f;
5978  SEND;
5979  }
5980  else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5981  (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5982  /* JIS X 0201 */
5983  c2 = JIS_X_0201_1976_K;
5984  c1 &= 0x7f;
5985  SEND;
5986  }
5987  else {
5988  /* already established */
5989  MORE;
5990  }
5991  }
5992  } else if (SP < c1 && c1 < DEL) {
5993  /* in case of Roman characters */
5994  if (shift_mode) {
5995  /* output 1 shifted byte */
5996  if (iso8859_f) {
5997  c2 = ISO_8859_1;
5998  SEND;
5999  } else if (nkf_byte_jisx0201_katakana_p(c1)){
6000  /* output 1 shifted byte */
6001  c2 = JIS_X_0201_1976_K;
6002  SEND;
6003  } else {
6004  /* look like bogus code */
6005  SKIP;
6006  }
6007  } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
6008  input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
6009  /* in case of Kanji shifted */
6010  MORE;
6011  } else if (c1 == '=' && mime_f && !mime_decode_mode) {
6012  /* Check MIME code */
6013  if ((c1 = (*i_getc)(f)) == EOF) {
6014  (*oconv)(0, '=');
6015  LAST;
6016  } else if (c1 == '?') {
6017  /* =? is mime conversion start sequence */
6018  if(mime_f == STRICT_MIME) {
6019  /* check in real detail */
6020  if (mime_begin_strict(f) == EOF)
6021  LAST;
6022  SKIP;
6023  } else if (mime_begin(f) == EOF)
6024  LAST;
6025  SKIP;
6026  } else {
6027  (*oconv)(0, '=');
6028  (*i_ungetc)(c1,f);
6029  SKIP;
6030  }
6031  } else {
6032  /* normal ASCII code */
6033  SEND;
6034  }
6035  } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
6036  shift_mode = 0;
6037  SKIP;
6038  } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
6039  shift_mode = 1;
6040  SKIP;
6041  } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
6042  if ((c1 = (*i_getc)(f)) == EOF) {
6043  (*oconv)(0, ESC);
6044  LAST;
6045  }
6046  else if (c1 == '&') {
6047  /* IRR */
6048  if ((c1 = (*i_getc)(f)) == EOF) {
6049  LAST;
6050  } else {
6051  SKIP;
6052  }
6053  }
6054  else if (c1 == '$') {
6055  /* GZDMx */
6056  if ((c1 = (*i_getc)(f)) == EOF) {
6057  /* don't send bogus code
6058  (*oconv)(0, ESC);
6059  (*oconv)(0, '$'); */
6060  LAST;
6061  } else if (c1 == '@' || c1 == 'B') {
6062  /* JIS X 0208 */
6064  SKIP;
6065  } else if (c1 == '(') {
6066  /* GZDM4 */
6067  if ((c1 = (*i_getc)(f)) == EOF) {
6068  /* don't send bogus code
6069  (*oconv)(0, ESC);
6070  (*oconv)(0, '$');
6071  (*oconv)(0, '(');
6072  */
6073  LAST;
6074  } else if (c1 == '@'|| c1 == 'B') {
6075  /* JIS X 0208 */
6077  SKIP;
6078 #ifdef X0212_ENABLE
6079  } else if (c1 == 'D'){
6081  SKIP;
6082 #endif /* X0212_ENABLE */
6083  } else if (c1 == 'O' || c1 == 'Q'){
6085  SKIP;
6086  } else if (c1 == 'P'){
6088  SKIP;
6089  } else {
6090  /* could be some special code */
6091  (*oconv)(0, ESC);
6092  (*oconv)(0, '$');
6093  (*oconv)(0, '(');
6094  (*oconv)(0, c1);
6095  SKIP;
6096  }
6097  } else if (broken_f&0x2) {
6098  /* accept any ESC-(-x as broken code ... */
6099  input_mode = JIS_X_0208;
6100  shift_mode = 0;
6101  SKIP;
6102  } else {
6103  (*oconv)(0, ESC);
6104  (*oconv)(0, '$');
6105  (*oconv)(0, c1);
6106  SKIP;
6107  }
6108  } else if (c1 == '(') {
6109  /* GZD4 */
6110  if ((c1 = (*i_getc)(f)) == EOF) {
6111  /* don't send bogus code
6112  (*oconv)(0, ESC);
6113  (*oconv)(0, '('); */
6114  LAST;
6115  }
6116  else if (c1 == 'I') {
6117  /* JIS X 0201 Katakana */
6119  shift_mode = 1;
6120  SKIP;
6121  }
6122  else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
6123  /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
6125  SKIP;
6126  }
6127  else if (broken_f&0x2) {
6129  SKIP;
6130  }
6131  else {
6132  (*oconv)(0, ESC);
6133  (*oconv)(0, '(');
6134  SEND;
6135  }
6136  }
6137  else if (c1 == '.') {
6138  /* G2D6 */
6139  if ((c1 = (*i_getc)(f)) == EOF) {
6140  LAST;
6141  }
6142  else if (c1 == 'A') {
6143  /* ISO-8859-1 */
6144  g2 = ISO_8859_1;
6145  SKIP;
6146  }
6147  else {
6148  (*oconv)(0, ESC);
6149  (*oconv)(0, '.');
6150  SEND;
6151  }
6152  }
6153  else if (c1 == 'N') {
6154  /* SS2 */
6155  c1 = (*i_getc)(f);
6156  if (g2 == ISO_8859_1) {
6157  c2 = ISO_8859_1;
6158  SEND;
6159  }else{
6160  (*i_ungetc)(c1, f);
6161  /* lonely ESC */
6162  (*oconv)(0, ESC);
6163  SEND;
6164  }
6165  }
6166  else {
6167  i_ungetc(c1,f);
6168  /* lonely ESC */
6169  (*oconv)(0, ESC);
6170  SKIP;
6171  }
6172  } else if (c1 == ESC && iconv == s_iconv) {
6173  /* ESC in Shift_JIS */
6174  if ((c1 = (*i_getc)(f)) == EOF) {
6175  (*oconv)(0, ESC);
6176  LAST;
6177  } else if (c1 == '$') {
6178  /* J-PHONE emoji */
6179  if ((c1 = (*i_getc)(f)) == EOF) {
6180  LAST;
6181  } else if (('E' <= c1 && c1 <= 'G') ||
6182  ('O' <= c1 && c1 <= 'Q')) {
6183  /*
6184  NUM : 0 1 2 3 4 5
6185  BYTE: G E F O P Q
6186  C%7 : 1 6 0 2 3 4
6187  C%7 : 0 1 2 3 4 5 6
6188  NUM : 2 0 3 4 5 X 1
6189  */
6190  static const nkf_char jphone_emoji_first_table[7] =
6191  {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6192  c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
6193  if ((c1 = (*i_getc)(f)) == EOF) LAST;
6194  while (SP <= c1 && c1 <= 'z') {
6195  (*oconv)(0, c1 + c3);
6196  if ((c1 = (*i_getc)(f)) == EOF) LAST;
6197  }
6198  SKIP;
6199  }
6200  else {
6201  (*oconv)(0, ESC);
6202  (*oconv)(0, '$');
6203  SEND;
6204  }
6205  }
6206  else {
6207  i_ungetc(c1,f);
6208  /* lonely ESC */
6209  (*oconv)(0, ESC);
6210  SKIP;
6211  }
6212  } else if (c1 == LF || c1 == CR) {
6213  if (broken_f&4) {
6214  input_mode = ASCII; set_iconv(FALSE, 0);
6215  SEND;
6216  } else if (mime_decode_f && !mime_decode_mode){
6217  if (c1 == LF) {
6218  if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
6219  i_ungetc(SP,f);
6220  continue;
6221  } else {
6222  i_ungetc(c1,f);
6223  }
6224  c1 = LF;
6225  SEND;
6226  } else { /* if (c1 == CR)*/
6227  if ((c1=(*i_getc)(f))!=EOF) {
6228  if (c1==SP) {
6229  i_ungetc(SP,f);
6230  continue;
6231  } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
6232  i_ungetc(SP,f);
6233  continue;
6234  } else {
6235  i_ungetc(c1,f);
6236  }
6237  i_ungetc(LF,f);
6238  } else {
6239  i_ungetc(c1,f);
6240  }
6241  c1 = CR;
6242  SEND;
6243  }
6244  }
6245  } else
6246  SEND;
6247  }
6248  /* send: */
6249  switch(input_mode){
6250  case ASCII:
6251  switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
6252  case -2:
6253  /* 4 bytes UTF-8 */
6254  if ((c3 = (*i_getc)(f)) != EOF) {
6255  code_status(c3);
6256  c3 <<= 8;
6257  if ((c4 = (*i_getc)(f)) != EOF) {
6258  code_status(c4);
6259  (*iconv)(c2, c1, c3|c4);
6260  }
6261  }
6262  break;
6263  case -3:
6264  /* 4 bytes UTF-8 (check combining character) */
6265  if ((c3 = (*i_getc)(f)) != EOF) {
6266  if ((c4 = (*i_getc)(f)) != EOF) {
6267  if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6268  (*i_ungetc)(c4, f);
6269  (*i_ungetc)(c3, f);
6270  w_iconv_nocombine(c2, c1, 0);
6271  }
6272  } else {
6273  (*i_ungetc)(c3, f);
6274  w_iconv_nocombine(c2, c1, 0);
6275  }
6276  } else {
6277  w_iconv_nocombine(c2, c1, 0);
6278  }
6279  break;
6280  case -1:
6281  /* 3 bytes EUC or UTF-8 */
6282  if ((c3 = (*i_getc)(f)) != EOF) {
6283  code_status(c3);
6284  if ((*iconv)(c2, c1, c3) == -3) {
6285  /* 6 bytes UTF-8 (check combining character) */
6286  nkf_char c5, c6;
6287  if ((c4 = (*i_getc)(f)) != EOF) {
6288  if ((c5 = (*i_getc)(f)) != EOF) {
6289  if ((c6 = (*i_getc)(f)) != EOF) {
6290  if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6291  (*i_ungetc)(c6, f);
6292  (*i_ungetc)(c5, f);
6293  (*i_ungetc)(c4, f);
6294  w_iconv_nocombine(c2, c1, c3);
6295  }
6296  } else {
6297  (*i_ungetc)(c5, f);
6298  (*i_ungetc)(c4, f);
6299  w_iconv_nocombine(c2, c1, c3);
6300  }
6301  } else {
6302  (*i_ungetc)(c4, f);
6303  w_iconv_nocombine(c2, c1, c3);
6304  }
6305  } else {
6306  w_iconv_nocombine(c2, c1, c3);
6307  }
6308  }
6309  }
6310  break;
6311  }
6312  break;
6313  case JIS_X_0208:
6314  case JIS_X_0213_1:
6315  if (ms_ucs_map_f &&
6316  0x7F <= c2 && c2 <= 0x92 &&
6317  0x21 <= c1 && c1 <= 0x7E) {
6318  /* CP932 UDC */
6319  c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
6320  c2 = 0;
6321  }
6322  (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
6323  break;
6324 #ifdef X0212_ENABLE
6325  case JIS_X_0212:
6326  (*oconv)(PREFIX_EUCG3 | c2, c1);
6327  break;
6328 #endif /* X0212_ENABLE */
6329  case JIS_X_0213_2:
6330  (*oconv)(PREFIX_EUCG3 | c2, c1);
6331  break;
6332  default:
6333  (*oconv)(input_mode, c1); /* other special case */
6334  }
6335 
6336  c2 = 0;
6337  c3 = 0;
6338  continue;
6339  /* goto next_word */
6340  }
6341 
6342 finished:
6343  /* epilogue */
6344  (*iconv)(EOF, 0, 0);
6345  if (!input_codename)
6346  {
6347  if (is_8bit) {
6348  struct input_code *p = input_code_list;
6349  struct input_code *result = p;
6350  while (p->name){
6351  if (p->score < result->score) result = p;
6352  ++p;
6353  }
6354  set_input_codename(result->name);
6355 #ifdef CHECK_OPTION
6356  debug(result->name);
6357 #endif
6358  }
6359  }
6360  return 0;
6361 }
6362 
6363 /*
6364  * int options(unsigned char *cp)
6365  *
6366  * return values:
6367  * 0: success
6368  * -1: ArgumentError
6369  */
6370 static int
6371 options(unsigned char *cp)
6372 {
6373  nkf_char i, j;
6374  unsigned char *p;
6375  unsigned char *cp_back = NULL;
6376  nkf_encoding *enc;
6377 
6378  if (option_mode==1)
6379  return 0;
6380  while(*cp && *cp++!='-');
6381  while (*cp || cp_back) {
6382  if(!*cp){
6383  cp = cp_back;
6384  cp_back = NULL;
6385  continue;
6386  }
6387  p = 0;
6388  switch (*cp++) {
6389  case '-': /* literal options */
6390  if (!*cp || *cp == SP) { /* ignore the rest of arguments */
6391  option_mode = 1;
6392  return 0;
6393  }
6394  for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
6395  p = (unsigned char *)long_option[i].name;
6396  for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
6397  if (*p == cp[j] || cp[j] == SP){
6398  p = &cp[j] + 1;
6399  break;
6400  }
6401  p = 0;
6402  }
6403  if (p == 0) {
6404 #if !defined(PERL_XS) && !defined(WIN32DLL)
6405  fprintf(stderr, "unknown long option: --%s\n", cp);
6406 #endif
6407  return -1;
6408  }
6409  while(*cp && *cp != SP && cp++);
6410  if (long_option[i].alias[0]){
6411  cp_back = cp;
6412  cp = (unsigned char *)long_option[i].alias;
6413  }else{
6414 #ifndef PERL_XS
6415  if (strcmp(long_option[i].name, "help") == 0){
6416  usage();
6417  exit(EXIT_SUCCESS);
6418  }
6419 #endif
6420  if (strcmp(long_option[i].name, "ic=") == 0){
6421  enc = nkf_enc_find((char *)p);
6422  if (!enc) continue;
6423  input_encoding = enc;
6424  continue;
6425  }
6426  if (strcmp(long_option[i].name, "oc=") == 0){
6427  enc = nkf_enc_find((char *)p);
6428  /* if (enc <= 0) continue; */
6429  if (!enc) continue;
6430  output_encoding = enc;
6431  continue;
6432  }
6433  if (strcmp(long_option[i].name, "guess=") == 0){
6434  if (p[0] == '0' || p[0] == '1') {
6435  guess_f = 1;
6436  } else {
6437  guess_f = 2;
6438  }
6439  continue;
6440  }
6441 #ifdef OVERWRITE
6442  if (strcmp(long_option[i].name, "overwrite") == 0){
6443  file_out_f = TRUE;
6444  overwrite_f = TRUE;
6445  preserve_time_f = TRUE;
6446  continue;
6447  }
6448  if (strcmp(long_option[i].name, "overwrite=") == 0){
6449  file_out_f = TRUE;
6450  overwrite_f = TRUE;
6451  preserve_time_f = TRUE;
6452  backup_f = TRUE;
6453  backup_suffix = (char *)p;
6454  continue;
6455  }
6456  if (strcmp(long_option[i].name, "in-place") == 0){
6457  file_out_f = TRUE;
6458  overwrite_f = TRUE;
6459  preserve_time_f = FALSE;
6460  continue;
6461  }
6462  if (strcmp(long_option[i].name, "in-place=") == 0){
6463  file_out_f = TRUE;
6464  overwrite_f = TRUE;
6465  preserve_time_f = FALSE;
6466  backup_f = TRUE;
6467  backup_suffix = (char *)p;
6468  continue;
6469  }
6470 #endif
6471 #ifdef INPUT_OPTION
6472  if (strcmp(long_option[i].name, "cap-input") == 0){
6473  cap_f = TRUE;
6474  continue;
6475  }
6476  if (strcmp(long_option[i].name, "url-input") == 0){
6477  url_f = TRUE;
6478  continue;
6479  }
6480 #endif
6481 #ifdef NUMCHAR_OPTION
6482  if (strcmp(long_option[i].name, "numchar-input") == 0){
6483  numchar_f = TRUE;
6484  continue;
6485  }
6486 #endif
6487 #ifdef CHECK_OPTION
6488  if (strcmp(long_option[i].name, "no-output") == 0){
6489  noout_f = TRUE;
6490  continue;
6491  }
6492  if (strcmp(long_option[i].name, "debug") == 0){
6493  debug_f = TRUE;
6494  continue;
6495  }
6496 #endif
6497  if (strcmp(long_option[i].name, "cp932") == 0){
6498 #ifdef SHIFTJIS_CP932
6499  cp51932_f = TRUE;
6500  cp932inv_f = -TRUE;
6501 #endif
6502 #ifdef UTF8_OUTPUT_ENABLE
6503  ms_ucs_map_f = UCS_MAP_CP932;
6504 #endif
6505  continue;
6506  }
6507  if (strcmp(long_option[i].name, "no-cp932") == 0){
6508 #ifdef SHIFTJIS_CP932
6509  cp51932_f = FALSE;
6510  cp932inv_f = FALSE;
6511 #endif
6512 #ifdef UTF8_OUTPUT_ENABLE
6513  ms_ucs_map_f = UCS_MAP_ASCII;
6514 #endif
6515  continue;
6516  }
6517 #ifdef SHIFTJIS_CP932
6518  if (strcmp(long_option[i].name, "cp932inv") == 0){
6519  cp932inv_f = -TRUE;
6520  continue;
6521  }
6522 #endif
6523 
6524 #ifdef X0212_ENABLE
6525  if (strcmp(long_option[i].name, "x0212") == 0){
6526  x0212_f = TRUE;
6527  continue;
6528  }
6529 #endif
6530 
6531 #ifdef EXEC_IO
6532  if (strcmp(long_option[i].name, "exec-in") == 0){
6533  exec_f = 1;
6534  return 0;
6535  }
6536  if (strcmp(long_option[i].name, "exec-out") == 0){
6537  exec_f = -1;
6538  return 0;
6539  }
6540 #endif
6541 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6542  if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6543  no_cp932ext_f = TRUE;
6544  continue;
6545  }
6546  if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6547  no_best_fit_chars_f = TRUE;
6548  continue;
6549  }
6550  if (strcmp(long_option[i].name, "fb-skip") == 0){
6551  encode_fallback = NULL;
6552  continue;
6553  }
6554  if (strcmp(long_option[i].name, "fb-html") == 0){
6555  encode_fallback = encode_fallback_html;
6556  continue;
6557  }
6558  if (strcmp(long_option[i].name, "fb-xml") == 0){
6559  encode_fallback = encode_fallback_xml;
6560  continue;
6561  }
6562  if (strcmp(long_option[i].name, "fb-java") == 0){
6563  encode_fallback = encode_fallback_java;
6564  continue;
6565  }
6566  if (strcmp(long_option[i].name, "fb-perl") == 0){
6567  encode_fallback = encode_fallback_perl;
6568  continue;
6569  }
6570  if (strcmp(long_option[i].name, "fb-subchar") == 0){
6571  encode_fallback = encode_fallback_subchar;
6572  continue;
6573  }
6574  if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6575  encode_fallback = encode_fallback_subchar;
6576  unicode_subchar = 0;
6577  if (p[0] != '0'){
6578  /* decimal number */
6579  for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6580  unicode_subchar *= 10;
6581  unicode_subchar += hex2bin(p[i]);
6582  }
6583  }else if(p[1] == 'x' || p[1] == 'X'){
6584  /* hexadecimal number */
6585  for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6586  unicode_subchar <<= 4;
6587  unicode_subchar |= hex2bin(p[i]);
6588  }
6589  }else{
6590  /* octal number */
6591  for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6592  unicode_subchar *= 8;
6593  unicode_subchar += hex2bin(p[i]);
6594  }
6595  }
6596  w16e_conv(unicode_subchar, &i, &j);
6597  unicode_subchar = i<<8 | j;
6598  continue;
6599  }
6600 #endif
6601 #ifdef UTF8_OUTPUT_ENABLE
6602  if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6603  ms_ucs_map_f = UCS_MAP_MS;
6604  continue;
6605  }
6606 #endif
6607 #ifdef UNICODE_NORMALIZATION
6608  if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6609  nfc_f = TRUE;
6610  continue;
6611  }
6612 #endif
6613  if (strcmp(long_option[i].name, "prefix=") == 0){
6614  if (nkf_isgraph(p[0])){
6615  for (i = 1; nkf_isgraph(p[i]); i++){
6616  prefix_table[p[i]] = p[0];
6617  }
6618  }
6619  continue;
6620  }
6621 #if !defined(PERL_XS) && !defined(WIN32DLL)
6622  fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6623 #endif
6624  return -1;
6625  }
6626  continue;
6627  case 'b': /* buffered mode */
6628  unbuf_f = FALSE;
6629  continue;
6630  case 'u': /* non bufferd mode */
6631  unbuf_f = TRUE;
6632  continue;
6633  case 't': /* transparent mode */
6634  if (*cp=='1') {
6635  /* alias of -t */
6636  cp++;
6637  nop_f = TRUE;
6638  } else if (*cp=='2') {
6639  /*
6640  * -t with put/get
6641  *
6642  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6643  *
6644  */
6645  cp++;
6646  nop_f = 2;
6647  } else
6648  nop_f = TRUE;
6649  continue;
6650  case 'j': /* JIS output */
6651  case 'n':
6652  output_encoding = nkf_enc_from_index(ISO_2022_JP);
6653  continue;
6654  case 'e': /* AT&T EUC output */
6655  output_encoding = nkf_enc_from_index(EUCJP_NKF);
6656  continue;
6657  case 's': /* SJIS output */
6658  output_encoding = nkf_enc_from_index(SHIFT_JIS);
6659  continue;
6660  case 'l': /* ISO8859 Latin-1 support, no conversion */
6661  iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6662  input_encoding = nkf_enc_from_index(ISO_8859_1);
6663  continue;
6664  case 'i': /* Kanji IN ESC-$-@/B */
6665  if (*cp=='@'||*cp=='B')
6666  kanji_intro = *cp++;
6667  continue;
6668  case 'o': /* ASCII IN ESC-(-J/B/H */
6669  /* ESC ( H was used in initial JUNET messages */
6670  if (*cp=='J'||*cp=='B'||*cp=='H')
6671  ascii_intro = *cp++;
6672  continue;
6673  case 'h':
6674  /*
6675  bit:1 katakana->hiragana
6676  bit:2 hiragana->katakana
6677  */
6678  if ('9'>= *cp && *cp>='0')
6679  hira_f |= (*cp++ -'0');
6680  else
6681  hira_f |= 1;
6682  continue;
6683  case 'r':
6684  rot_f = TRUE;
6685  continue;
6686 #if defined(MSDOS) || defined(__OS2__)
6687  case 'T':
6688  binmode_f = FALSE;
6689  continue;
6690 #endif
6691 #ifndef PERL_XS
6692  case 'V':
6693  show_configuration();
6694  exit(EXIT_SUCCESS);
6695  break;
6696  case 'v':
6697  version();
6698  exit(EXIT_SUCCESS);
6699  break;
6700 #endif
6701 #ifdef UTF8_OUTPUT_ENABLE
6702  case 'w': /* UTF-{8,16,32} output */
6703  if (cp[0] == '8') {
6704  cp++;
6705  if (cp[0] == '0'){
6706  cp++;
6707  output_encoding = nkf_enc_from_index(UTF_8N);
6708  } else {
6709  output_bom_f = TRUE;
6710  output_encoding = nkf_enc_from_index(UTF_8_BOM);
6711  }
6712  } else {
6713  int enc_idx;
6714  if ('1'== cp[0] && '6'==cp[1]) {
6715  cp += 2;
6716  enc_idx = UTF_16;
6717  } else if ('3'== cp[0] && '2'==cp[1]) {
6718  cp += 2;
6719  enc_idx = UTF_32;
6720  } else {
6721  output_encoding = nkf_enc_from_index(UTF_8);
6722  continue;
6723  }
6724  if (cp[0]=='L') {
6725  cp++;
6726  output_endian = ENDIAN_LITTLE;
6727  output_bom_f = TRUE;
6728  } else if (cp[0] == 'B') {
6729  cp++;
6730  output_bom_f = TRUE;
6731  }
6732  if (cp[0] == '0'){
6733  output_bom_f = FALSE;
6734  cp++;
6735  enc_idx = enc_idx == UTF_16
6736  ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6737  : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6738  } else {
6739  enc_idx = enc_idx == UTF_16
6740  ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6741  : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6742  }
6743  output_encoding = nkf_enc_from_index(enc_idx);
6744  }
6745  continue;
6746 #endif
6747 #ifdef UTF8_INPUT_ENABLE
6748  case 'W': /* UTF input */
6749  if (cp[0] == '8') {
6750  cp++;
6751  input_encoding = nkf_enc_from_index(UTF_8);
6752  }else{
6753  int enc_idx;
6754  if ('1'== cp[0] && '6'==cp[1]) {
6755  cp += 2;
6756  input_endian = ENDIAN_BIG;
6757  enc_idx = UTF_16;
6758  } else if ('3'== cp[0] && '2'==cp[1]) {
6759  cp += 2;
6760  input_endian = ENDIAN_BIG;
6761  enc_idx = UTF_32;
6762  } else {
6763  input_encoding = nkf_enc_from_index(UTF_8);
6764  continue;
6765  }
6766  if (cp[0]=='L') {
6767  cp++;
6768  input_endian = ENDIAN_LITTLE;
6769  } else if (cp[0] == 'B') {
6770  cp++;
6771  input_endian = ENDIAN_BIG;
6772  }
6773  enc_idx = (enc_idx == UTF_16
6774  ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6775  : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6776  input_encoding = nkf_enc_from_index(enc_idx);
6777  }
6778  continue;
6779 #endif
6780  /* Input code assumption */
6781  case 'J': /* ISO-2022-JP input */
6782  input_encoding = nkf_enc_from_index(ISO_2022_JP);
6783  continue;
6784  case 'E': /* EUC-JP input */
6785  input_encoding = nkf_enc_from_index(EUCJP_NKF);
6786  continue;
6787  case 'S': /* Shift_JIS input */
6788  input_encoding = nkf_enc_from_index(SHIFT_JIS);
6789  continue;
6790  case 'Z': /* Convert X0208 alphabet to asii */
6791  /* alpha_f
6792  bit:0 Convert JIS X 0208 Alphabet to ASCII
6793  bit:1 Convert Kankaku to one space
6794  bit:2 Convert Kankaku to two spaces
6795  bit:3 Convert HTML Entity
6796  bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6797  */
6798  while ('0'<= *cp && *cp <='4') {
6799  alpha_f |= 1 << (*cp++ - '0');
6800  }
6801  alpha_f |= 1;
6802  continue;
6803  case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6804  x0201_f = FALSE; /* No X0201->X0208 conversion */
6805  /* accept X0201
6806  ESC-(-I in JIS, EUC, MS Kanji
6807  SI/SO in JIS, EUC, MS Kanji
6808  SS2 in EUC, JIS, not in MS Kanji
6809  MS Kanji (0xa0-0xdf)
6810  output X0201
6811  ESC-(-I in JIS (0x20-0x5f)
6812  SS2 in EUC (0xa0-0xdf)
6813  0xa0-0xd in MS Kanji (0xa0-0xdf)
6814  */
6815  continue;
6816  case 'X': /* Convert X0201 kana to X0208 */
6817  x0201_f = TRUE;
6818  continue;
6819  case 'F': /* prserve new lines */
6820  fold_preserve_f = TRUE;
6821  case 'f': /* folding -f60 or -f */
6822  fold_f = TRUE;
6823  fold_len = 0;
6824  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6825  fold_len *= 10;
6826  fold_len += *cp++ - '0';
6827  }
6828  if (!(0<fold_len && fold_len<BUFSIZ))
6829  fold_len = DEFAULT_FOLD;
6830  if (*cp=='-') {
6831  fold_margin = 0;
6832  cp++;
6833  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6834  fold_margin *= 10;
6835  fold_margin += *cp++ - '0';
6836  }
6837  }
6838  continue;
6839  case 'm': /* MIME support */
6840  /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6841  if (*cp=='B'||*cp=='Q') {
6842  mime_decode_mode = *cp++;
6843  mimebuf_f = FIXED_MIME;
6844  } else if (*cp=='N') {
6845  mime_f = TRUE; cp++;
6846  } else if (*cp=='S') {
6847  mime_f = STRICT_MIME; cp++;
6848  } else if (*cp=='0') {
6849  mime_decode_f = FALSE;
6850  mime_f = FALSE; cp++;
6851  } else {
6852  mime_f = STRICT_MIME;
6853  }
6854  continue;
6855  case 'M': /* MIME output */
6856  if (*cp=='B') {
6857  mimeout_mode = 'B';
6858  mimeout_f = FIXED_MIME; cp++;
6859  } else if (*cp=='Q') {
6860  mimeout_mode = 'Q';
6861  mimeout_f = FIXED_MIME; cp++;
6862  } else {
6863  mimeout_f = TRUE;
6864  }
6865  continue;
6866  case 'B': /* Broken JIS support */
6867  /* bit:0 no ESC JIS
6868  bit:1 allow any x on ESC-(-x or ESC-$-x
6869  bit:2 reset to ascii on NL
6870  */
6871  if ('9'>= *cp && *cp>='0')
6872  broken_f |= 1<<(*cp++ -'0');
6873  else
6874  broken_f |= TRUE;
6875  continue;
6876 #ifndef PERL_XS
6877  case 'O':/* for Output file */
6878  file_out_f = TRUE;
6879  continue;
6880 #endif
6881  case 'c':/* add cr code */
6882  eolmode_f = CRLF;
6883  continue;
6884  case 'd':/* delete cr code */
6885  eolmode_f = LF;
6886  continue;
6887  case 'I': /* ISO-2022-JP output */
6888  iso2022jp_f = TRUE;
6889  continue;
6890  case 'L': /* line mode */
6891  if (*cp=='u') { /* unix */
6892  eolmode_f = LF; cp++;
6893  } else if (*cp=='m') { /* mac */
6894  eolmode_f = CR; cp++;
6895  } else if (*cp=='w') { /* windows */
6896  eolmode_f = CRLF; cp++;
6897  } else if (*cp=='0') { /* no conversion */
6898  eolmode_f = 0; cp++;
6899  }
6900  continue;
6901 #ifndef PERL_XS
6902  case 'g':
6903  if ('2' <= *cp && *cp <= '9') {
6904  guess_f = 2;
6905  cp++;
6906  } else if (*cp == '0' || *cp == '1') {
6907  guess_f = 1;
6908  cp++;
6909  } else {
6910  guess_f = 1;
6911  }
6912  continue;
6913 #endif
6914  case SP:
6915  /* module multiple options in a string are allowed for Perl module */
6916  while(*cp && *cp++!='-');
6917  continue;
6918  default:
6919 #if !defined(PERL_XS) && !defined(WIN32DLL)
6920  fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6921 #endif
6922  /* bogus option but ignored */
6923  return -1;
6924  }
6925  }
6926  return 0;
6927 }
6928 
6929 #ifdef WIN32DLL
6930 #include "nkf32dll.c"
6931 #elif defined(PERL_XS)
6932 #else /* WIN32DLL */
6933 int
6934 main(int argc, char **argv)
6935 {
6936  FILE *fin;
6937  unsigned char *cp;
6938 
6939  char *outfname = NULL;
6940  char *origfname;
6941 
6942 #ifdef EASYWIN /*Easy Win */
6943  _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6944 #endif
6945 #ifdef DEFAULT_CODE_LOCALE
6946  setlocale(LC_CTYPE, "");
6947 #endif
6948  nkf_state_init();
6949 
6950  for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6951  cp = (unsigned char *)*argv;
6952  options(cp);
6953 #ifdef EXEC_IO
6954  if (exec_f){
6955  int fds[2], pid;
6956  if (pipe(fds) < 0 || (pid = fork()) < 0){
6957  abort();
6958  }
6959  if (pid == 0){
6960  if (exec_f > 0){
6961  close(fds[0]);
6962  dup2(fds[1], 1);
6963  }else{
6964  close(fds[1]);
6965  dup2(fds[0], 0);
6966  }
6967  execvp(argv[1], &argv[1]);
6968  }
6969  if (exec_f > 0){
6970  close(fds[1]);
6971  dup2(fds[0], 0);
6972  }else{
6973  close(fds[0]);
6974  dup2(fds[1], 1);
6975  }
6976  argc = 0;
6977  break;
6978  }
6979 #endif
6980  }
6981 
6982  if (guess_f) {
6983 #ifdef CHECK_OPTION
6984  int debug_f_back = debug_f;
6985 #endif
6986 #ifdef EXEC_IO
6987  int exec_f_back = exec_f;
6988 #endif
6989 #ifdef X0212_ENABLE
6990  int x0212_f_back = x0212_f;
6991 #endif
6992  int x0213_f_back = x0213_f;
6993  int guess_f_back = guess_f;
6994  reinit();
6995  guess_f = guess_f_back;
6996  mime_f = FALSE;
6997 #ifdef CHECK_OPTION
6998  debug_f = debug_f_back;
6999 #endif
7000 #ifdef EXEC_IO
7001  exec_f = exec_f_back;
7002 #endif
7003  x0212_f = x0212_f_back;
7004  x0213_f = x0213_f_back;
7005  }
7006 
7007  if (binmode_f == TRUE)
7008 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7009  if (freopen("","wb",stdout) == NULL)
7010  return (-1);
7011 #else
7012  setbinmode(stdout);
7013 #endif
7014 
7015  if (unbuf_f)
7016  setbuf(stdout, (char *) NULL);
7017  else
7018  setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
7019 
7020  if (argc == 0) {
7021  if (binmode_f == TRUE)
7022 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7023  if (freopen("","rb",stdin) == NULL) return (-1);
7024 #else
7025  setbinmode(stdin);
7026 #endif
7027  setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
7028  if (nop_f)
7029  noconvert(stdin);
7030  else {
7031  kanji_convert(stdin);
7032  if (guess_f) print_guessed_code(NULL);
7033  }
7034  } else {
7035  int nfiles = argc;
7036  int is_argument_error = FALSE;
7037  while (argc--) {
7038  input_codename = NULL;
7039  input_eol = 0;
7040 #ifdef CHECK_OPTION
7041  iconv_for_check = 0;
7042 #endif
7043  if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
7044  perror(*(argv-1));
7045  is_argument_error = TRUE;
7046  continue;
7047  } else {
7048 #ifdef OVERWRITE
7049  int fd = 0;
7050  int fd_backup = 0;
7051 #endif
7052 
7053  /* reopen file for stdout */
7054  if (file_out_f == TRUE) {
7055 #ifdef OVERWRITE
7056  if (overwrite_f){
7057  outfname = nkf_xmalloc(strlen(origfname)
7058  + strlen(".nkftmpXXXXXX")
7059  + 1);
7060  strcpy(outfname, origfname);
7061 #ifdef MSDOS
7062  {
7063  int i;
7064  for (i = strlen(outfname); i; --i){
7065  if (outfname[i - 1] == '/'
7066  || outfname[i - 1] == '\\'){
7067  break;
7068  }
7069  }
7070  outfname[i] = '\0';
7071  }
7072  strcat(outfname, "ntXXXXXX");
7073  mktemp(outfname);
7074  fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7075  S_IREAD | S_IWRITE);
7076 #else
7077  strcat(outfname, ".nkftmpXXXXXX");
7078  fd = mkstemp(outfname);
7079 #endif
7080  if (fd < 0
7081  || (fd_backup = dup(fileno(stdout))) < 0
7082  || dup2(fd, fileno(stdout)) < 0
7083  ){
7084  perror(origfname);
7085  return -1;
7086  }
7087  }else
7088 #endif
7089  if(argc == 1) {
7090  outfname = *argv++;
7091  argc--;
7092  } else {
7093  outfname = "nkf.out";
7094  }
7095 
7096  if(freopen(outfname, "w", stdout) == NULL) {
7097  perror (outfname);
7098  return (-1);
7099  }
7100  if (binmode_f == TRUE) {
7101 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7102  if (freopen("","wb",stdout) == NULL)
7103  return (-1);
7104 #else
7105  setbinmode(stdout);
7106 #endif
7107  }
7108  }
7109  if (binmode_f == TRUE)
7110 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7111  if (freopen("","rb",fin) == NULL)
7112  return (-1);
7113 #else
7114  setbinmode(fin);
7115 #endif
7116  setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
7117  if (nop_f)
7118  noconvert(fin);
7119  else {
7120  char *filename = NULL;
7121  kanji_convert(fin);
7122  if (nfiles > 1) filename = origfname;
7123  if (guess_f) print_guessed_code(filename);
7124  }
7125  fclose(fin);
7126 #ifdef OVERWRITE
7127  if (overwrite_f) {
7128  struct stat sb;
7129 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7130  time_t tb[2];
7131 #else
7132  struct utimbuf tb;
7133 #endif
7134 
7135  fflush(stdout);
7136  close(fd);
7137  if (dup2(fd_backup, fileno(stdout)) < 0){
7138  perror("dup2");
7139  }
7140  if (stat(origfname, &sb)) {
7141  fprintf(stderr, "Can't stat %s\n", origfname);
7142  }
7143  /* $B%Q!<%_%C%7%g%s$rI|85(B */
7144  if (chmod(outfname, sb.st_mode)) {
7145  fprintf(stderr, "Can't set permission %s\n", outfname);
7146  }
7147 
7148  /* $B%?%$%`%9%?%s%W$rI|85(B */
7149  if(preserve_time_f){
7150 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7151  tb[0] = tb[1] = sb.st_mtime;
7152  if (utime(outfname, tb)) {
7153  fprintf(stderr, "Can't set timestamp %s\n", outfname);
7154  }
7155 #else
7156  tb.actime = sb.st_atime;
7157  tb.modtime = sb.st_mtime;
7158  if (utime(outfname, &tb)) {
7159  fprintf(stderr, "Can't set timestamp %s\n", outfname);
7160  }
7161 #endif
7162  }
7163  if(backup_f){
7164  char *backup_filename = get_backup_filename(backup_suffix, origfname);
7165 #ifdef MSDOS
7166  unlink(backup_filename);
7167 #endif
7168  if (rename(origfname, backup_filename)) {
7169  perror(backup_filename);
7170  fprintf(stderr, "Can't rename %s to %s\n",
7171  origfname, backup_filename);
7172  }
7173  nkf_xfree(backup_filename);
7174  }else{
7175 #ifdef MSDOS
7176  if (unlink(origfname)){
7177  perror(origfname);
7178  }
7179 #endif
7180  }
7181  if (rename(outfname, origfname)) {
7182  perror(origfname);
7183  fprintf(stderr, "Can't rename %s to %s\n",
7184  outfname, origfname);
7185  }
7186  nkf_xfree(outfname);
7187  }
7188 #endif
7189  }
7190  }
7191  if (is_argument_error)
7192  return(-1);
7193  }
7194 #ifdef EASYWIN /*Easy Win */
7195  if (file_out_f == FALSE)
7196  scanf("%d",&end_check);
7197  else
7198  fclose(stdout);
7199 #else /* for Other OS */
7200  if (file_out_f == TRUE)
7201  fclose(stdout);
7202 #endif /*Easy Win */
7203  return (0);
7204 }
7205 #endif /* WIN32DLL */
strcmp
int strcmp(const char *, const char *)
nkf.h
ASCII
@ ASCII
Definition: nkf.c:87
utf8tbl.h
EUC_JP
@ EUC_JP
Definition: nkf.c:99
input_code::iconv_func
nkf_char(* iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:332
nkf_encoding::id
const int id
Definition: nkf.c:162
i
uint32_t i
Definition: rb_mjit_min_header-2.7.1.h:5464
BS
#define BS
Definition: nkf.c:70
nkf_xfree
#define nkf_xfree(ptr)
Definition: nkf.c:714
chmod
int chmod(const char *__path, mode_t __mode)
ISO_2022_JP_3
@ ISO_2022_JP_3
Definition: nkf.c:94
TRUE
#define TRUE
Definition: nkf.h:175
abort
void abort(void) __attribute__((__noreturn__))
stat
Definition: rb_mjit_min_header-2.7.1.h:2384
input_code::index
nkf_char index
Definition: nkf.c:329
OUTPUT_UTF8
#define OUTPUT_UTF8(val)
Definition: nkf.c:2803
nkf_buf_t::ptr
nkf_char * ptr
Definition: nkf.c:837
nkf_buf_t
Definition: nkf.c:834
SCORE_L2
#define SCORE_L2
Definition: nkf.c:2956
ISO_2022_JP
@ ISO_2022_JP
Definition: nkf.c:89
SCORE_INIT
#define SCORE_INIT
Definition: nkf.c:2966
nkf_enc_asciicompat
#define nkf_enc_asciicompat(enc)
Definition: nkf.c:763
stdout
#define stdout
Definition: rb_mjit_min_header-2.7.1.h:1484
SCORE_CP932
#define SCORE_CP932
Definition: nkf.c:2959
nkf_char_unicode_p
#define nkf_char_unicode_p(c)
Definition: nkf.c:430
NkfEncodingEUC_JP
nkf_native_encoding NkfEncodingEUC_JP
Definition: nkf.c:156
BINARY
@ BINARY
Definition: nkf.c:122
assert
#define assert(x)
Definition: dlmalloc.c:1176
UTF_16BE_BOM
@ UTF_16BE_BOM
Definition: nkf.c:114
pipe
int pipe(int __fildes[2])
range
#define range(low, item, hi)
Definition: date_strftime.c:21
OVERWRITE
#define OVERWRITE
Definition: config.h:16
id
const int id
Definition: nkf.c:209
sprintf
int sprintf(char *__restrict, const char *__restrict,...) __attribute__((__format__(__printf__
DEL
#define DEL
Definition: nkf.c:76
nkf_char
int nkf_char
Definition: nkf.h:38
ISO_2022_JP_2004
@ ISO_2022_JP_2004
Definition: nkf.c:95
utf8_to_euc_2bytes_x0213
const unsigned short *const utf8_to_euc_2bytes_x0213[]
Definition: utf8tbl.c:12540
nkf_char_unicode_value_p
#define nkf_char_unicode_value_p(c)
Definition: nkf.c:432
mime_priority_func
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
Definition: nkf.c:4289
fopen
FILE * fopen(const char *__restrict _name, const char *__restrict _type)
setvbuffer
#define setvbuffer(fp, buf, size)
Definition: nkf.h:91
strcat
char * strcat(char *__restrict, const char *__restrict)
STD_GC_BUFSIZE
#define STD_GC_BUFSIZE
Definition: nkf.c:3321
OUTPUT_UTF32
#define OUTPUT_UTF32(c)
Definition: nkf.c:2902
nkf_char_unicode_new
#define nkf_char_unicode_new(c)
Definition: nkf.c:429
NKF_ICONV_WAIT_COMBINING_CHAR
#define NKF_ICONV_WAIT_COMBINING_CHAR
Definition: nkf.c:2333
UTF_32
@ UTF_32
Definition: nkf.c:117
PREFIX_EUCG3
#define PREFIX_EUCG3
Definition: nkf.c:422
DEFAULT_CODE_LOCALE
#define DEFAULT_CODE_LOCALE
Definition: nkf.h:137
EINVAL
#define EINVAL
Definition: rb_mjit_min_header-2.7.1.h:10964
UTF8_OUTPUT_ENABLE
#define UTF8_OUTPUT_ENABLE
Definition: config.h:6
nkf_encoding_table
nkf_encoding nkf_encoding_table[]
Definition: nkf.c:167
output_ascii_escape_sequence
#define output_ascii_escape_sequence(mode)
Definition: nkf.c:2552
nkf_state_t::mimeout_state
nkf_char mimeout_state
Definition: nkf.c:3315
MORE
#define MORE
Definition: nkf.c:5836
NkfEncodingUTF_32
nkf_native_encoding NkfEncodingUTF_32
Definition: nkf.c:159
NKF_ICONV_INVALID_CODE_RANGE
#define NKF_ICONV_INVALID_CODE_RANGE
Definition: nkf.c:2332
UTF16_TO_UTF32
#define UTF16_TO_UTF32(lead, trail)
Definition: nkf.c:434
UTF_16
@ UTF_16
Definition: nkf.c:112
mktemp
char * mktemp(char *) __attribute__((__deprecated__("the use of `mktemp' is dangerous
ARG_UNUSED
#define ARG_UNUSED
Definition: nkf.h:181
HOLD_SIZE
#define HOLD_SIZE
Definition: nkf.c:304
S_IREAD
#define S_IREAD
Definition: rb_mjit_min_header-2.7.1.h:2420
alias
const char * alias
Definition: nkf.c:1159
COPY_RIGHT
#define COPY_RIGHT
Definition: nkf.c:25
SCORE_NO_EXIST
#define SCORE_NO_EXIST
Definition: nkf.c:2962
assert.h
NKF_VERSION
#define NKF_VERSION
Definition: nkf.c:23
NUMCHAR_OPTION
#define NUMCHAR_OPTION
Definition: config.h:22
input_code::status_func
void(* status_func)(struct input_code *, nkf_char)
Definition: nkf.c:331
cp932inv
const unsigned short cp932inv[2][189]
Definition: utf8tbl.c:13634
nkf_enc_to_iconv
#define nkf_enc_to_iconv(enc)
Definition: nkf.c:761
fclose
int fclose(FILE *)
nkf_buf_length
#define nkf_buf_length(buf)
Definition: nkf.c:859
UCS_MAP_CP932
#define UCS_MAP_CP932
Definition: nkf.c:349
EXIT_FAILURE
#define EXIT_FAILURE
Definition: eval_intern.h:32
UTF_8
@ UTF_8
Definition: nkf.c:108
ptr
struct RIMemo * ptr
Definition: debug.c:74
euc_to_utf8_2bytes
const unsigned short *const euc_to_utf8_2bytes[]
Definition: utf8tbl.c:3059
nkf_enc_unicode_p
#define nkf_enc_unicode_p(enc)
Definition: nkf.c:766
TAB
#define TAB
Definition: nkf.c:71
MIME_DECODE_DEFAULT
#define MIME_DECODE_DEFAULT
Definition: nkf.h:13
UTF_32BE
@ UTF_32BE
Definition: nkf.c:118
nkf_isalnum
#define nkf_isalnum(c)
Definition: nkf.c:289
MIME_BUF_MASK
#define MIME_BUF_MASK
Definition: nkf.c:4319
stdin
#define stdin
Definition: rb_mjit_min_header-2.7.1.h:1483
NULL
#define NULL
Definition: _sdbm.c:101
char
#define char
Definition: rb_mjit_min_header-2.7.1.h:2876
nkf_enc_name
#define nkf_enc_name(enc)
Definition: nkf.c:758
NKF_UNSPECIFIED
#define NKF_UNSPECIFIED
Definition: nkf.c:387
CP50221
@ CP50221
Definition: nkf.c:91
byte_order
byte_order
Definition: nkf.c:61
last
unsigned int last
Definition: nkf.c:4324
set_input_mode
#define set_input_mode(mode)
Definition: nkf.c:5839
nkf_state_t::broken_state
nkf_char broken_state
Definition: nkf.c:3313
nkf_noescape_mime
#define nkf_noescape_mime(c)
Definition: nkf.c:297
strlen
size_t strlen(const char *)
euc_to_utf8_1byte
const unsigned short euc_to_utf8_1byte[]
Definition: utf8tbl.c:3045
STRICT_MIME
#define STRICT_MIME
Definition: nkf.c:58
LF
#define LF
Definition: nkf.c:72
GETA2
#define GETA2
Definition: nkf.c:316
MAXRECOVER
#define MAXRECOVER
Definition: nkf.c:4329
CP10001
@ CP10001
Definition: nkf.c:98
freopen
FILE * freopen(const char *__restrict, const char *__restrict, FILE *__restrict)
rot13
#define rot13(c)
Definition: nkf.c:4156
MIME_BUF_SIZE
#define MIME_BUF_SIZE
Definition: nkf.c:4318
UTF_32BE_BOM
@ UTF_32BE_BOM
Definition: nkf.c:119
SHIFT_JIS_2004
@ SHIFT_JIS_2004
Definition: nkf.c:105
debug
#define debug(lvl, x...)
Definition: ffi.c:52
utf8_to_euc_2bytes_932
const unsigned short *const utf8_to_euc_2bytes_932[]
Definition: utf8tbl.c:12480
EUCJP_NKF
@ EUCJP_NKF
Definition: nkf.c:100
UTF_16LE_BOM
@ UTF_16LE_BOM
Definition: nkf.c:116
input_code::_file_stat
int _file_stat
Definition: nkf.c:333
nkf_state_t::nfc_buf
nkf_buf_t * nfc_buf
Definition: nkf.c:3316
nkf_isoctal
#define nkf_isoctal(c)
Definition: nkf.c:283
NKF_INT32_C
#define NKF_INT32_C(n)
Definition: nkf.h:39
DEFAULT_J
#define DEFAULT_J
Definition: nkf.c:311
nkf_buf_empty_p
#define nkf_buf_empty_p(buf)
Definition: nkf.c:860
void
void
Definition: rb_mjit_min_header-2.7.1.h:13278
rot47
#define rot47(c)
Definition: nkf.c:4166
utimbuf::actime
long actime
Definition: file.c:2865
SO
#define SO
Definition: nkf.c:78
ENDIAN_BIG
@ ENDIAN_BIG
Definition: nkf.c:62
input_code
Definition: nkf.c:325
SCORE_ERROR
#define SCORE_ERROR
Definition: nkf.c:2964
ISO_2022_JP_1
@ ISO_2022_JP_1
Definition: nkf.c:93
nkf_char_unicode_bmp_p
#define nkf_char_unicode_bmp_p(c)
Definition: nkf.c:431
UTF_16LE
@ UTF_16LE
Definition: nkf.c:115
SJ6394
#define SJ6394
x0213_1_surrogate_table
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
Definition: utf8tbl.c:3250
is_ibmext_in_sjis
#define is_ibmext_in_sjis(c2)
Definition: nkf.c:301
is_eucg3
#define is_eucg3(c2)
Definition: nkf.c:296
fork
pid_t fork(void)
s2
const char * s2
Definition: rb_mjit_min_header-2.7.1.h:5454
VALUE_MASK
#define VALUE_MASK
Definition: nkf.c:425
utf8_to_euc_3bytes_932
const unsigned short *const *const utf8_to_euc_3bytes_932[]
Definition: utf8tbl.c:12582
input_code::name
const char * name
Definition: nkf.c:326
utf8_to_euc_3bytes_mac
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
Definition: utf8tbl.c:12588
encoding_name_to_id_table
struct @54 encoding_name_to_id_table[]
UTF_8_BOM
@ UTF_8_BOM
Definition: nkf.c:110
NKF_ICONV_NOT_COMBINED
#define NKF_ICONV_NOT_COMBINED
Definition: nkf.c:2334
bin2hex
#define bin2hex(c)
Definition: nkf.c:295
nkf_isprint
#define nkf_isprint(c)
Definition: nkf.c:290
input_code_list
struct input_code input_code_list[]
Definition: nkf.c:475
stat
int stat(const char *__restrict __path, struct stat *__restrict __sbuf)
UTF_32LE_BOM
@ UTF_32LE_BOM
Definition: nkf.c:121
NKF_ICONV_NEED_TWO_MORE_BYTES
#define NKF_ICONV_NEED_TWO_MORE_BYTES
Definition: nkf.c:2405
SCORE_KANA
#define SCORE_KANA
Definition: nkf.c:2957
nkf_isxdigit
#define nkf_isxdigit(c)
Definition: nkf.c:285
SHIFT_JISX0213
@ SHIFT_JISX0213
Definition: nkf.c:104
NkfEncodingISO_2022_JP
nkf_native_encoding NkfEncodingISO_2022_JP
Definition: nkf.c:154
UTF_8N
@ UTF_8N
Definition: nkf.c:109
euc_to_utf8_2bytes_x0213
const unsigned short *const euc_to_utf8_2bytes_x0213[]
Definition: utf8tbl.c:3139
CP50220
@ CP50220
Definition: nkf.c:90
x0212_to_utf8_2bytes
const unsigned short *const x0212_to_utf8_2bytes[]
Definition: utf8tbl.c:3167
nkf_buf_t::len
long len
Definition: nkf.c:836
x0213_2_surrogate_table
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]
Definition: utf8tbl.c:3278
dup
int dup(int __fildes)
malloc
void * malloc(size_t) __attribute__((__malloc__)) __attribute__((__warn_unused_result__)) __attribute__((__alloc_size__(1)))
nkf_byte_jisx0201_katakana_p
#define nkf_byte_jisx0201_katakana_p(c)
Definition: nkf.c:302
unlink
int unlink(const char *__path)
SI
#define SI
Definition: nkf.c:77
EXIT_SUCCESS
#define EXIT_SUCCESS
Definition: error.c:39
input
unsigned int input
Definition: nkf.c:4325
size_t
long unsigned int size_t
Definition: rb_mjit_min_header-2.7.1.h:666
SCORE_DEPEND
#define SCORE_DEPEND
Definition: nkf.c:2958
HELP_OUTPUT
#define HELP_OUTPUT
Definition: nkf.h:27
size
int size
Definition: encoding.c:58
FALSE
#define FALSE
Definition: nkf.h:174
euc_to_utf8_2bytes_ms
const unsigned short *const euc_to_utf8_2bytes_ms[]
Definition: utf8tbl.c:3086
CRLF
#define CRLF
Definition: nkf.c:81
execvp
int execvp(const char *__file, char *const __argv[])
mime_input_buf
#define mime_input_buf(n)
Definition: nkf.c:4320
hex2bin
#define hex2bin(c)
Definition: nkf.c:292
UCS_MAP_MS
#define UCS_MAP_MS
Definition: nkf.c:348
time_t
long time_t
Definition: rb_mjit_min_header-2.7.1.h:1236
E2BIG
#define E2BIG
Definition: rb_mjit_min_header-2.7.1.h:10949
ENDIAN_3412
@ ENDIAN_3412
Definition: nkf.c:65
SEND
#define SEND
Definition: nkf.c:5837
stat::st_mode
mode_t st_mode
Definition: rb_mjit_min_header-2.7.1.h:2388
SCORE_X0213
#define SCORE_X0213
Definition: nkf.c:2961
JIS_X_0201_1976_K
@ JIS_X_0201_1976_K
Definition: nkf.c:124
fileno
int fileno(FILE *)
scanf
int int int int scanf(const char *__restrict,...) __attribute__((__format__(__scanf__
CR
#define CR
Definition: nkf.c:73
ENDIAN_LITTLE
@ ENDIAN_LITTLE
Definition: nkf.c:63
SJ0162
#define SJ0162
EUC_JISX0213
@ EUC_JISX0213
Definition: nkf.c:106
index
int index
Definition: rb_mjit_min_header-2.7.1.h:11251
JIS_X_0208
@ JIS_X_0208
Definition: nkf.c:128
WINDOWS_31J
@ WINDOWS_31J
Definition: nkf.c:97
UTF_16BE
@ UTF_16BE
Definition: nkf.c:113
perror
void perror(const char *)
EOF
#define EOF
Definition: vsnprintf.c:203
BUFSIZ
#define BUFSIZ
Definition: rb_mjit_min_header-2.7.1.h:1474
nkf_state_t::broken_buf
nkf_buf_t * broken_buf
Definition: nkf.c:3314
dup2
RUBY_EXTERN int dup2(int, int)
Definition: dup2.c:27
putchar
int putchar(int)
FOLD_MARGIN
#define FOLD_MARGIN
Definition: nkf.c:504
nkf_enc_cp5022x_p
#define nkf_enc_cp5022x_p(enc)
Definition: nkf.c:770
JIS_X_0212
@ JIS_X_0212
Definition: nkf.c:129
utimbuf::modtime
long modtime
Definition: file.c:2866
buf
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
nkf_encoding::base_encoding
const nkf_native_encoding * base_encoding
Definition: nkf.c:164
nkf_native_encoding::name
const char * name
Definition: nkf.c:148
nkf_isblank
#define nkf_isblank(c)
Definition: nkf.c:286
argv
char ** argv
Definition: ruby.c:223
NkfEncodingUTF_16
nkf_native_encoding NkfEncodingUTF_16
Definition: nkf.c:158
f
#define f
RANGE_NUM_MAX
#define RANGE_NUM_MAX
ESC
#define ESC
Definition: nkf.c:74
x0212_to_utf8_2bytes_x0213
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
Definition: utf8tbl.c:3193
main
int main(int argc, char **argv)
Definition: nkf.c:6934
getc
int getc(FILE *)
nkf_encoding
Definition: nkf.c:161
nkf_native_encoding
Definition: nkf.c:147
EUCJP_ASCII
@ EUCJP_ASCII
Definition: nkf.c:103
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
is_alnum
#define is_alnum(c)
Definition: nkf.c:278
cc
const struct rb_call_cache * cc
Definition: rb_mjit_min_header-2.7.1.h:13233
x0213_combining_table
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
Definition: utf8tbl.c:3223
LAST
#define LAST
Definition: nkf.c:5838
close
int close(int __fildes)
MIMEOUT_BUF_LENGTH
#define MIMEOUT_BUF_LENGTH
Definition: nkf.c:5052
setbuf
void setbuf(FILE *__restrict, char *__restrict)
nkf_toupper
#define nkf_toupper(c)
Definition: nkf.c:282
CP50222
@ CP50222
Definition: nkf.c:92
nkf_state_t
Definition: nkf.c:3311
CP51932
@ CP51932
Definition: nkf.c:101
NKF_ENCODING_TABLE_SIZE
@ NKF_ENCODING_TABLE_SIZE
Definition: nkf.c:123
utf8_to_euc_3bytes
const unsigned short *const *const utf8_to_euc_3bytes[]
Definition: utf8tbl.c:12570
UCS_MAP_CP10001
#define UCS_MAP_CP10001
Definition: nkf.c:350
int
__inline__ int
Definition: rb_mjit_min_header-2.7.1.h:2839
ISO_8859_1
@ ISO_8859_1
Definition: nkf.c:88
shiftjis_cp932
const unsigned short shiftjis_cp932[3][189]
Definition: utf8tbl.c:13554
SS2
#define SS2
Definition: nkf.c:79
INPUT_OPTION
#define INPUT_OPTION
Definition: config.h:19
nkf_enc_to_index
#define nkf_enc_to_index(enc)
Definition: nkf.c:759
INPUT_CODE_FIX
#define INPUT_CODE_FIX
Definition: config.h:12
NEXT
#define NEXT
Definition: nkf.c:5834
JIS_X_0213_1
@ JIS_X_0213_1
Definition: nkf.c:132
X0213_SURROGATE_FIND
#define X0213_SURROGATE_FIND(tbl, size, euc)
Definition: nkf.c:1976
DEFAULT_R
#define DEFAULT_R
Definition: nkf.c:312
argc
int argc
Definition: ruby.c:222
X0201_DEFAULT
#define X0201_DEFAULT
Definition: nkf.h:16
SCORE_X0212
#define SCORE_X0212
Definition: nkf.c:2960
SHIFT_JIS
@ SHIFT_JIS
Definition: nkf.c:96
memmove
#define memmove(dst, src, len)
Definition: rb_mjit_min_header-2.7.1.h:2848
nkf_encodings
nkf_encodings
Definition: nkf.c:86
GETA1
#define GETA1
Definition: nkf.c:315
UCS_MAP_ASCII
#define UCS_MAP_ASCII
Definition: nkf.c:347
normalization_table
const struct normalization_pair normalization_table[]
Definition: utf8tbl.c:12606
nkf_state_t::std_gc_buf
nkf_buf_t * std_gc_buf
Definition: nkf.c:3312
FIXED_MIME
#define FIXED_MIME
Definition: nkf.c:57
config.h
mkstemp
int mkstemp(char *)
ENDIAN_2143
@ ENDIAN_2143
Definition: nkf.c:64
errno
int errno
input_code::buf
nkf_char buf[3]
Definition: nkf.c:330
EUCJP_MS
@ EUCJP_MS
Definition: nkf.c:102
count
int count
Definition: nkf.c:5055
exit
void exit(int __status) __attribute__((__noreturn__))
len
uint8_t len
Definition: escape.c:17
printf
int int int printf(const char *__restrict,...) __attribute__((__format__(__printf__
input_code::score
nkf_char score
Definition: nkf.c:328
utf8_to_euc_2bytes_ms
const unsigned short *const utf8_to_euc_2bytes_ms[]
Definition: utf8tbl.c:12450
NkfEncodingUTF_8
nkf_native_encoding NkfEncodingUTF_8
Definition: nkf.c:157
nkf_isdigit
#define nkf_isdigit(c)
Definition: nkf.c:284
utf8_to_euc_3bytes_ms
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
Definition: utf8tbl.c:12576
OUTPUT_UTF16
#define OUTPUT_UTF16(val)
Definition: nkf.c:2858
OUTPUT_UTF16_BYTES
#define OUTPUT_UTF16_BYTES(c1, c2)
Definition: nkf.c:2848
utf8_to_euc_2bytes_mac
const unsigned short *const utf8_to_euc_2bytes_mac[]
Definition: utf8tbl.c:12510
euc_to_utf8_2bytes_mac
const unsigned short *const euc_to_utf8_2bytes_mac[]
Definition: utf8tbl.c:3113
top
unsigned int top
Definition: nkf.c:4323
SCORE_iMIME
#define SCORE_iMIME
Definition: nkf.c:2963
nkf_isgraph
#define nkf_isgraph(c)
Definition: nkf.c:291
strncmp
int strncmp(const char *, const char *, size_t)
UTF8_MAC
@ UTF8_MAC
Definition: nkf.c:111
stderr
#define stderr
Definition: rb_mjit_min_header-2.7.1.h:1485
NkfEncodingShift_JIS
nkf_native_encoding NkfEncodingShift_JIS
Definition: nkf.c:155
shiftjis_x0212
const unsigned short shiftjis_x0212[3][189]
Definition: utf8tbl.c:13691
nkf_isspace
#define nkf_isspace(c)
Definition: nkf.c:287
strcpy
char * strcpy(char *__restrict, const char *__restrict)
S_IWRITE
#define S_IWRITE
Definition: rb_mjit_min_header-2.7.1.h:2421
utf8_to_euc_3bytes_x0213
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
Definition: utf8tbl.c:12594
utimbuf
Definition: file.c:2864
utf8_to_euc_2bytes
const unsigned short *const utf8_to_euc_2bytes[]
Definition: utf8tbl.c:12420
NKF_RELEASE_DATE
#define NKF_RELEASE_DATE
Definition: nkf.c:24
DEFAULT_FOLD
#define DEFAULT_FOLD
Definition: nkf.c:505
SKIP
#define SKIP
Definition: nkf.c:5835
nkf_enc_to_oconv
#define nkf_enc_to_oconv(enc)
Definition: nkf.c:762
IOBUF_SIZE
#define IOBUF_SIZE
Definition: nkf.c:308
UTF_32LE
@ UTF_32LE
Definition: nkf.c:120
UTF8_INPUT_ENABLE
#define UTF8_INPUT_ENABLE
Definition: config.h:5
input_code::stat
nkf_char stat
Definition: nkf.c:327
rename
int rename(const char *, const char *)
JIS_X_0213_2
@ JIS_X_0213_2
Definition: nkf.c:131
realloc
void * realloc(void *, size_t) __attribute__((__warn_unused_result__)) __attribute__((__alloc_size__(2)))
SP
#define SP
Definition: nkf.c:75
setbinmode
#define setbinmode(fp)
Definition: nkf.h:85
fprintf
int fprintf(FILE *__restrict, const char *__restrict,...) __attribute__((__format__(__printf__
x0212_shiftjis
const unsigned short *const x0212_shiftjis[]
Definition: utf8tbl.c:14612
DEFAULT_NEWLINE
#define DEFAULT_NEWLINE
Definition: nkf.h:22
__sFILE
Definition: vsnprintf.c:169
fflush
int fflush(FILE *)
char_size
#define char_size(c2, c1)
Definition: nkf.c:3824
nkf_encoding::name
const char * name
Definition: nkf.c:163
x0213_combining_chars
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
Definition: utf8tbl.c:3220
EUC_JIS_2004
@ EUC_JIS_2004
Definition: nkf.c:107
strncat
char * strncat(char *__restrict, const char *__restrict, size_t)
src
__inline__ const void *__restrict src
Definition: rb_mjit_min_header-2.7.1.h:2836
nkf_buf_t::capa
long capa
Definition: nkf.c:835
name
const char * name
Definition: nkf.c:208
NkfEncodingASCII
nkf_native_encoding NkfEncodingASCII
Definition: nkf.c:153