Ruby  2.1.10p492(2016-04-01revision54464)
regenc.h
Go to the documentation of this file.
1 #ifndef ONIGURUMA_REGENC_H
2 #define ONIGURUMA_REGENC_H
3 /**********************************************************************
4  regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
5 **********************************************************************/
6 /*-
7  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8  * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 #ifndef REGINT_H
33 #ifndef RUBY_EXTERN
34 #include "ruby/config.h"
35 #include "ruby/defines.h"
36 #endif
37 #ifdef ONIG_ESCAPE_UCHAR_COLLISION
38 #undef ONIG_ESCAPE_UCHAR_COLLISION
39 #endif
40 #endif
41 #include "ruby/oniguruma.h"
42 
44 
45 typedef struct {
49 
50 
51 #ifndef NULL
52 #define NULL ((void* )0)
53 #endif
54 
55 #ifndef TRUE
56 #define TRUE 1
57 #endif
58 
59 #ifndef FALSE
60 #define FALSE 0
61 #endif
62 
63 #ifndef ARG_UNUSED
64 #if defined(__GNUC__)
65 # define ARG_UNUSED __attribute__ ((unused))
66 #else
67 # define ARG_UNUSED
68 #endif
69 #endif
70 
71 #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
72 #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
73 #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
74 #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
75 
76 #define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
77 
78 /* character types bit flag */
79 #define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
80 #define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
81 #define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
82 #define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
83 #define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
84 #define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
85 #define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
86 #define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
87 #define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
88 #define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
89 #define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
90 #define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
91 #define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
92 #define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
93 #define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
94 
95 #define CTYPE_TO_BIT(ctype) (1<<(ctype))
96 #define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
97  ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
98  (ctype) == ONIGENC_CTYPE_PRINT)
99 
100 
101 typedef struct {
102  const UChar *name;
103  int ctype;
104  short int len;
106 
107 #define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
108 
109 #define USE_CRNL_AS_LINE_TERMINATOR
110 #define USE_UNICODE_PROPERTIES
111 /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
112 /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
113 
114 
115 #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
116 
117 /* for encoding system implementation (internal) */
120 ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
121 ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
123 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc));
124 
125 
126 /* methods for single byte encoding */
127 ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc));
135 ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
136 
137 /* methods for multi byte encoding */
139 ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
143 ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
144 ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
147 ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
148 
149 
150 /* in enc/unicode.c */
151 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
152 ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
153 ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
154 ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
155 ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
156 ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
157 
158 
159 #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
160 #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
161 
162 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
163  OnigEncISO_8859_1_ToLowerCaseTable[c]
164 #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
165  OnigEncISO_8859_1_ToUpperCaseTable[c]
166 
169 
170 ONIG_EXTERN int
171 onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
172 ONIG_EXTERN int
173 onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
175 onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
176 
177 /* defined in regexec.c, but used in enc/xxx.c */
178 extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
179 
183 ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
184 
185 #define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
186 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
187 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
188 #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
189  ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
190 #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
191  (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
192  ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
193 
194 /* Check if the code is in the range. (from <= code && code <= to) */
195 #define ONIGENC_IS_IN_RANGE(code, from, to) \
196  ((OnigCodePoint )((code) - (from)) <= (OnigCodePoint )((to) - (from)))
197 
198 
199 #ifdef ONIG_ENC_REGISTER
200 extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*);
201 #define OnigEncodingName(n) encoding_##n
202 #define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n)
203 #define OnigEncodingDefine(f,n) \
204  OnigEncodingDeclare(n); \
205  void Init_##f(void) { \
206  ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
207  &OnigEncodingName(n)); \
208  } \
209  OnigEncodingDeclare(n)
210 #else
211 #define OnigEncodingName(n) OnigEncoding##n
212 #define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n)
213 #define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
214 #endif
215 
216 /* macros for define replica encoding and encoding alias */
217 #define ENC_REPLICATE(name, orig)
218 #define ENC_ALIAS(name, orig)
219 #define ENC_DUMMY(name)
220 
222 
223 #endif /* ONIGURUMA_REGENC_H */
unsigned int OnigCodePoint
Definition: oniguruma.h:112
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding
Definition: regenc.h:180
int onigenc_always_true_is_allowed_reverse_match(const UChar *s ARG_UNUSED, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:650
OnigCodePoint from
Definition: regenc.h:46
OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar *p, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:620
unsigned int OnigCaseFoldType
Definition: oniguruma.h:119
int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:664
int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, const UChar **pp, const UChar *end ARG_UNUSED, UChar *lower)
Definition: regenc.c:692
int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:734
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6028
int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
Definition: regenc.c:751
int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void *arg, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:410
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]
Definition: regenc.h:182
unsigned char OnigUChar
Definition: oniguruma.h:111
int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype)
Definition: regenc.c:824
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]
Definition: regenc.h:183
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]
Definition: regenc.h:167
int onigenc_always_false_is_allowed_reverse_match(const UChar *s ARG_UNUSED, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:657
int onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:873
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void *arg, OnigEncoding enc))
int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void *arg)
Definition: regenc.c:463
int onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:627
const UChar * name
Definition: regenc.h:102
UChar * onigenc_single_byte_left_adjust_char_head(const UChar *start ARG_UNUSED, const UChar *s, const UChar *end, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:642
int onigenc_is_mbc_newline_0x0a(const UChar *p, const UChar *end, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:581
#define ONIG_EXTERN
Definition: oniguruma.h:98
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]
Definition: regenc.h:168
int(* OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint *to, int to_len, void *arg)
Definition: oniguruma.h:156
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]
Definition: regenc.h:181
#define RUBY_SYMBOL_EXPORT_END
Definition: missing.h:39
int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype)
Definition: regenc.c:839
int ctype
Definition: regenc.h:103
unsigned int OnigCtype
Definition: oniguruma.h:113
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
int onigenc_single_byte_mbc_enc_len(const UChar *p ARG_UNUSED, const UChar *e ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:613
#define RUBY_SYMBOL_EXPORT_BEGIN
Definition: missing.h:38
int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint *ranges[], OnigEncoding enc)
Definition: regenc.c:573
int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, const OnigUChar *p, const OnigUChar *end ARG_UNUSED, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:433
#define UChar
Definition: oniguruma.h:108
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
Definition: regenc.c:768
#define f
int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:742
int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:633
Definition: regenc.h:101
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:854
OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:674
short int len
Definition: regenc.h:104
OnigCodePoint to
Definition: regenc.h:47
int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar *p, UChar *end)
Definition: regenc.c:791
int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar **p, const UChar *end, UChar *lower, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:591
int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar *p, const OnigUChar *end, OnigCaseFoldCodeItem items[])
Definition: regenc.c:491