00001 #ifndef ONIGURUMA_REGPARSE_H
00002 #define ONIGURUMA_REGPARSE_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "regint.h"
00033
00034
00035 #define NT_STR 0
00036 #define NT_CCLASS 1
00037 #define NT_CTYPE 2
00038 #define NT_CANY 3
00039 #define NT_BREF 4
00040 #define NT_QTFR 5
00041 #define NT_ENCLOSE 6
00042 #define NT_ANCHOR 7
00043 #define NT_LIST 8
00044 #define NT_ALT 9
00045 #define NT_CALL 10
00046
00047
00048 #define NTYPE2BIT(type) (1<<(type))
00049
00050 #define BIT_NT_STR NTYPE2BIT(NT_STR)
00051 #define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
00052 #define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
00053 #define BIT_NT_CANY NTYPE2BIT(NT_CANY)
00054 #define BIT_NT_BREF NTYPE2BIT(NT_BREF)
00055 #define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
00056 #define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
00057 #define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
00058 #define BIT_NT_LIST NTYPE2BIT(NT_LIST)
00059 #define BIT_NT_ALT NTYPE2BIT(NT_ALT)
00060 #define BIT_NT_CALL NTYPE2BIT(NT_CALL)
00061
00062 #define IS_NODE_TYPE_SIMPLE(type) \
00063 ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
00064 BIT_NT_CANY | BIT_NT_BREF)) != 0)
00065
00066 #define NTYPE(node) ((node)->u.base.type)
00067 #define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
00068
00069 #define NSTR(node) (&((node)->u.str))
00070 #define NCCLASS(node) (&((node)->u.cclass))
00071 #define NCTYPE(node) (&((node)->u.ctype))
00072 #define NBREF(node) (&((node)->u.bref))
00073 #define NQTFR(node) (&((node)->u.qtfr))
00074 #define NENCLOSE(node) (&((node)->u.enclose))
00075 #define NANCHOR(node) (&((node)->u.anchor))
00076 #define NCONS(node) (&((node)->u.cons))
00077 #define NCALL(node) (&((node)->u.call))
00078
00079 #define NCAR(node) (NCONS(node)->car)
00080 #define NCDR(node) (NCONS(node)->cdr)
00081
00082
00083
00084 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
00085 #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
00086
00087 #define ENCLOSE_MEMORY (1<<0)
00088 #define ENCLOSE_OPTION (1<<1)
00089 #define ENCLOSE_STOP_BACKTRACK (1<<2)
00090
00091 #define NODE_STR_MARGIN 16
00092 #define NODE_STR_BUF_SIZE 24
00093 #define NODE_BACKREFS_SIZE 6
00094
00095 #define NSTR_RAW (1<<0)
00096 #define NSTR_AMBIG (1<<1)
00097 #define NSTR_DONT_GET_OPT_INFO (1<<2)
00098
00099 #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
00100 #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
00101 #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
00102 #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
00103 #define NSTRING_SET_DONT_GET_OPT_INFO(node) \
00104 (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
00105 #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
00106 #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
00107 #define NSTRING_IS_DONT_GET_OPT_INFO(node) \
00108 (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
00109
00110 #define BACKREFS_P(br) \
00111 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
00112
00113 #define NQ_TARGET_ISNOT_EMPTY 0
00114 #define NQ_TARGET_IS_EMPTY 1
00115 #define NQ_TARGET_IS_EMPTY_MEM 2
00116 #define NQ_TARGET_IS_EMPTY_REC 3
00117
00118
00119 #define NST_MIN_FIXED (1<<0)
00120 #define NST_MAX_FIXED (1<<1)
00121 #define NST_CLEN_FIXED (1<<2)
00122 #define NST_MARK1 (1<<3)
00123 #define NST_MARK2 (1<<4)
00124 #define NST_MEM_BACKREFED (1<<5)
00125 #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
00126 #define NST_RECURSION (1<<7)
00127 #define NST_CALLED (1<<8)
00128 #define NST_ADDR_FIXED (1<<9)
00129 #define NST_NAMED_GROUP (1<<10)
00130 #define NST_NAME_REF (1<<11)
00131 #define NST_IN_REPEAT (1<<12)
00132 #define NST_NEST_LEVEL (1<<13)
00133 #define NST_BY_NUMBER (1<<14)
00134
00135 #define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
00136 #define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
00137
00138 #define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
00139 #define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
00140 #define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
00141 #define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
00142 #define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
00143 #define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
00144 #define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
00145 #define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
00146 #define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
00147 (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
00148 #define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
00149
00150 #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
00151 #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
00152 #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
00153 #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
00154 #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
00155 #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
00156 #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
00157
00158 #define CALLNODE_REFNUM_UNDEF -1
00159
00160 typedef struct {
00161 NodeBase base;
00162 UChar* s;
00163 UChar* end;
00164 unsigned int flag;
00165 int capa;
00166 UChar buf[NODE_STR_BUF_SIZE];
00167 } StrNode;
00168
00169 typedef struct {
00170 NodeBase base;
00171 int state;
00172 struct _Node* target;
00173 int lower;
00174 int upper;
00175 int greedy;
00176 int target_empty_info;
00177 struct _Node* head_exact;
00178 struct _Node* next_head_exact;
00179 int is_refered;
00180 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00181 int comb_exp_check_num;
00182 #endif
00183 } QtfrNode;
00184
00185 typedef struct {
00186 NodeBase base;
00187 int state;
00188 int type;
00189 int regnum;
00190 OnigOptionType option;
00191 struct _Node* target;
00192 AbsAddrType call_addr;
00193
00194 OnigDistance min_len;
00195 OnigDistance max_len;
00196 int char_len;
00197 int opt_count;
00198 } EncloseNode;
00199
00200 #ifdef USE_SUBEXP_CALL
00201
00202 typedef struct {
00203 int offset;
00204 struct _Node* target;
00205 } UnsetAddr;
00206
00207 typedef struct {
00208 int num;
00209 int alloc;
00210 UnsetAddr* us;
00211 } UnsetAddrList;
00212
00213 typedef struct {
00214 NodeBase base;
00215 int state;
00216 int group_num;
00217 UChar* name;
00218 UChar* name_end;
00219 struct _Node* target;
00220 UnsetAddrList* unset_addr_list;
00221 } CallNode;
00222
00223 #endif
00224
00225 typedef struct {
00226 NodeBase base;
00227 int state;
00228 int back_num;
00229 int back_static[NODE_BACKREFS_SIZE];
00230 int* back_dynamic;
00231 int nest_level;
00232 } BRefNode;
00233
00234 typedef struct {
00235 NodeBase base;
00236 int type;
00237 struct _Node* target;
00238 int char_len;
00239 } AnchorNode;
00240
00241 typedef struct {
00242 NodeBase base;
00243 struct _Node* car;
00244 struct _Node* cdr;
00245 } ConsAltNode;
00246
00247 typedef struct {
00248 NodeBase base;
00249 int ctype;
00250 int not;
00251 } CtypeNode;
00252
00253 typedef struct _Node {
00254 union {
00255 NodeBase base;
00256 StrNode str;
00257 CClassNode cclass;
00258 QtfrNode qtfr;
00259 EncloseNode enclose;
00260 BRefNode bref;
00261 AnchorNode anchor;
00262 ConsAltNode cons;
00263 CtypeNode ctype;
00264 #ifdef USE_SUBEXP_CALL
00265 CallNode call;
00266 #endif
00267 } u;
00268 } Node;
00269
00270
00271 #define NULL_NODE ((Node* )0)
00272
00273 #define SCANENV_MEMNODES_SIZE 8
00274 #define SCANENV_MEM_NODES(senv) \
00275 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
00276 (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
00277
00278 typedef struct {
00279 OnigOptionType option;
00280 OnigCaseFoldType case_fold_flag;
00281 OnigEncoding enc;
00282 const OnigSyntaxType* syntax;
00283 BitStatusType capture_history;
00284 BitStatusType bt_mem_start;
00285 BitStatusType bt_mem_end;
00286 BitStatusType backrefed_mem;
00287 UChar* pattern;
00288 UChar* pattern_end;
00289 UChar* error;
00290 UChar* error_end;
00291 regex_t* reg;
00292 int num_call;
00293 #ifdef USE_SUBEXP_CALL
00294 UnsetAddrList* unset_addr_list;
00295 #endif
00296 int num_mem;
00297 #ifdef USE_NAMED_GROUP
00298 int num_named;
00299 #endif
00300 int mem_alloc;
00301 Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
00302 Node** mem_nodes_dynamic;
00303 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00304 int num_comb_exp_check;
00305 int comb_exp_max_regnum;
00306 int curr_max_regnum;
00307 int has_recursion;
00308 #endif
00309 int warnings_flag;
00310 const char* sourcefile;
00311 int sourceline;
00312 } ScanEnv;
00313
00314
00315 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
00316 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
00317 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
00318
00319 #ifdef USE_NAMED_GROUP
00320 typedef struct {
00321 int new_val;
00322 } GroupNumRemap;
00323
00324 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
00325 #endif
00326
00327 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
00328 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
00329 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
00330 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
00331 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
00332 extern void onig_node_conv_to_str_node P_((Node* node, int raw));
00333 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
00334 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
00335 extern void onig_node_free P_((Node* node));
00336 extern Node* onig_node_new_enclose P_((int type));
00337 extern Node* onig_node_new_anchor P_((int type));
00338 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
00339 extern Node* onig_node_new_list P_((Node* left, Node* right));
00340 extern Node* onig_node_list_add P_((Node* list, Node* x));
00341 extern Node* onig_node_new_alt P_((Node* left, Node* right));
00342 extern void onig_node_str_clear P_((Node* node));
00343 extern int onig_free_node_list P_((void));
00344 extern int onig_names_free P_((regex_t* reg));
00345 extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
00346 extern int onig_free_shared_cclass_table P_((void));
00347
00348 #ifdef ONIG_DEBUG
00349 #ifdef USE_NAMED_GROUP
00350 extern int onig_print_names(FILE*, regex_t*);
00351 #endif
00352 #endif
00353
00354 #endif
00355