aboutsummaryrefslogblamecommitdiffstats
path: root/doc/api-documentation/html/regex_8h-source.html
blob: 18a7b738dad5da3c10a515afce57f7fc567780e6 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559














































































































































































































































































































































































































































































































































































                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
<title>regex.h Source File</title>
<link href="doxygen.css" rel="stylesheet" type="text/css">
</head><body>
<!-- Generated by Doxygen 1.2.15 -->
<center>
<a class="qindex" href="index.html">Main Page</a> &nbsp; <a class="qindex" href="namespaces.html">Namespace List</a> &nbsp; <a class="qindex" href="hierarchy.html">Class Hierarchy</a> &nbsp; <a class="qindex" href="classes.html">Alphabetical List</a> &nbsp; <a class="qindex" href="annotated.html">Compound List</a> &nbsp; <a class="qindex" href="files.html">File List</a> &nbsp; <a class="qindex" href="functions.html">Compound Members</a> &nbsp; </center>
<hr><h1>regex.h</h1><div class="fragment"><pre>00001 <font class="comment">/* Definitions for data structures and routines for the regular</font>
00002 <font class="comment">   expression library, version 0.12.</font>
00003 <font class="comment">   Copyright (C) 1985,89,90,91,92,93,95,96,97 Free Software Foundation, Inc.</font>
00004 <font class="comment"></font>
00005 <font class="comment">   the C library, however.  The master source lives in /gd/gnu/lib.</font>
00006 <font class="comment"></font>
00007 <font class="comment">NOTE: The canonical source of this file is maintained with the </font>
00008 <font class="comment">GNU C Library.  Bugs can be reported to bug-glibc@prep.ai.mit.edu.</font>
00009 <font class="comment"></font>
00010 <font class="comment">This program is free software; you can redistribute it and/or modify it</font>
00011 <font class="comment">under the terms of the GNU General Public License as published by the</font>
00012 <font class="comment">Free Software Foundation; either version 2, or (at your option) any</font>
00013 <font class="comment">later version.</font>
00014 <font class="comment"></font>
00015 <font class="comment">This program is distributed in the hope that it will be useful,</font>
00016 <font class="comment">but WITHOUT ANY WARRANTY; without even the implied warranty of</font>
00017 <font class="comment">MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</font>
00018 <font class="comment">GNU General Public License for more details.</font>
00019 <font class="comment"></font>
00020 <font class="comment">You should have received a copy of the GNU General Public License</font>
00021 <font class="comment">along with this program; if not, write to the Free Software Foundation, </font>
00022 <font class="comment">Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */</font>
00023 
00024 <font class="preprocessor">#ifndef _REGEX_H</font>
00025 <font class="preprocessor"></font><font class="preprocessor">#define _REGEX_H 1</font>
00026 <font class="preprocessor"></font>
00027 <font class="comment">/* Allow the use in C++ code.  */</font>
00028 <font class="preprocessor">#ifdef __cplusplus</font>
00029 <font class="preprocessor"></font><font class="keyword">extern</font> <font class="stringliteral">"C"</font>
00030 {
00031 <font class="preprocessor">#endif</font>
00032 <font class="preprocessor"></font>
00033 <font class="comment">/* POSIX says that &lt;sys/types.h&gt; must be included (by the caller) before</font>
00034 <font class="comment">   &lt;regex.h&gt;.  */</font>
00035 
00036 <font class="preprocessor">#if !defined (_POSIX_C_SOURCE) &amp;&amp; !defined (_POSIX_SOURCE) &amp;&amp; defined (VMS)</font>
00037 <font class="preprocessor"></font><font class="comment">/* VMS doesn't have `size_t' in &lt;sys/types.h&gt;, even though POSIX says it</font>
00038 <font class="comment">   should be there.  */</font>
00039 <font class="preprocessor">#include &lt;stddef.h&gt;</font>
00040 <font class="preprocessor">#endif</font>
00041 <font class="preprocessor"></font>
00042 <font class="comment">/* The following two types have to be signed and unsigned integer type</font>
00043 <font class="comment">   wide enough to hold a value of a pointer.  For most ANSI compilers</font>
00044 <font class="comment">   ptrdiff_t and size_t should be likely OK.  Still size of these two</font>
00045 <font class="comment">   types is 2 for Microsoft C.  Ugh... */</font>
00046   <font class="keyword">typedef</font> <font class="keywordtype">long</font> <font class="keywordtype">int</font> s_reg_t;
00047   <font class="keyword">typedef</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> <font class="keywordtype">int</font> active_reg_t;
00048 
00049 <font class="comment">/* The following bits are used to determine the regexp syntax we</font>
00050 <font class="comment">   recognize.  The set/not-set meanings are chosen so that Emacs syntax</font>
00051 <font class="comment">   remains the value 0.  The bits are given in alphabetical order, and</font>
00052 <font class="comment">   the definitions shifted by one from the previous bit; thus, when we</font>
00053 <font class="comment">   add or remove a bit, only one other definition need change.  */</font>
00054   <font class="keyword">typedef</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> <font class="keywordtype">int</font> reg_syntax_t;
00055 
00056 <font class="comment">/* If this bit is not set, then \ inside a bracket expression is literal.</font>
00057 <font class="comment">   If set, then such a \ quotes the following character.  */</font>
00058 <font class="preprocessor">#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)</font>
00059 <font class="preprocessor"></font>
00060 <font class="comment">/* If this bit is not set, then + and ? are operators, and \+ and \? are</font>
00061 <font class="comment">     literals.</font>
00062 <font class="comment">   If set, then \+ and \? are operators and + and ? are literals.  */</font>
00063 <font class="preprocessor">#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS &lt;&lt; 1)</font>
00064 <font class="preprocessor"></font>
00065 <font class="comment">/* If this bit is set, then character classes are supported.  They are:</font>
00066 <font class="comment">     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],</font>
00067 <font class="comment">     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].</font>
00068 <font class="comment">   If not set, then character classes are not supported.  */</font>
00069 <font class="preprocessor">#define RE_CHAR_CLASSES (RE_BK_PLUS_QM &lt;&lt; 1)</font>
00070 <font class="preprocessor"></font>
00071 <font class="comment">/* If this bit is set, then ^ and $ are always anchors (outside bracket</font>
00072 <font class="comment">     expressions, of course).</font>
00073 <font class="comment">   If this bit is not set, then it depends:</font>
00074 <font class="comment">        ^  is an anchor if it is at the beginning of a regular</font>
00075 <font class="comment">           expression or after an open-group or an alternation operator;</font>
00076 <font class="comment">        $  is an anchor if it is at the end of a regular expression, or</font>
00077 <font class="comment">           before a close-group or an alternation operator.</font>
00078 <font class="comment"></font>
00079 <font class="comment">   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because</font>
00080 <font class="comment">   POSIX draft 11.2 says that * etc. in leading positions is undefined.</font>
00081 <font class="comment">   We already implemented a previous draft which made those constructs</font>
00082 <font class="comment">   invalid, though, so we haven't changed the code back.  */</font>
00083 <font class="preprocessor">#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES &lt;&lt; 1)</font>
00084 <font class="preprocessor"></font>
00085 <font class="comment">/* If this bit is set, then special characters are always special</font>
00086 <font class="comment">     regardless of where they are in the pattern.</font>
00087 <font class="comment">   If this bit is not set, then special characters are special only in</font>
00088 <font class="comment">     some contexts; otherwise they are ordinary.  Specifically,</font>
00089 <font class="comment">     * + ? and intervals are only special when not after the beginning,</font>
00090 <font class="comment">     open-group, or alternation operator.  */</font>
00091 <font class="preprocessor">#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS &lt;&lt; 1)</font>
00092 <font class="preprocessor"></font>
00093 <font class="comment">/* If this bit is set, then *, +, ?, and { cannot be first in an re or</font>
00094 <font class="comment">     immediately after an alternation or begin-group operator.  */</font>
00095 <font class="preprocessor">#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS &lt;&lt; 1)</font>
00096 <font class="preprocessor"></font>
00097 <font class="comment">/* If this bit is set, then . matches newline.</font>
00098 <font class="comment">   If not set, then it doesn't.  */</font>
00099 <font class="preprocessor">#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS &lt;&lt; 1)</font>
00100 <font class="preprocessor"></font>
00101 <font class="comment">/* If this bit is set, then . doesn't match NUL.</font>
00102 <font class="comment">   If not set, then it does.  */</font>
00103 <font class="preprocessor">#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE &lt;&lt; 1)</font>
00104 <font class="preprocessor"></font>
00105 <font class="comment">/* If this bit is set, nonmatching lists [^...] do not match newline.</font>
00106 <font class="comment">   If not set, they do.  */</font>
00107 <font class="preprocessor">#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL &lt;&lt; 1)</font>
00108 <font class="preprocessor"></font>
00109 <font class="comment">/* If this bit is set, either \{...\} or {...} defines an</font>
00110 <font class="comment">     interval, depending on RE_NO_BK_BRACES.</font>
00111 <font class="comment">   If not set, \{, \}, {, and } are literals.  */</font>
00112 <font class="preprocessor">#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE &lt;&lt; 1)</font>
00113 <font class="preprocessor"></font>
00114 <font class="comment">/* If this bit is set, +, ? and | aren't recognized as operators.</font>
00115 <font class="comment">   If not set, they are.  */</font>
00116 <font class="preprocessor">#define RE_LIMITED_OPS (RE_INTERVALS &lt;&lt; 1)</font>
00117 <font class="preprocessor"></font>
00118 <font class="comment">/* If this bit is set, newline is an alternation operator.</font>
00119 <font class="comment">   If not set, newline is literal.  */</font>
00120 <font class="preprocessor">#define RE_NEWLINE_ALT (RE_LIMITED_OPS &lt;&lt; 1)</font>
00121 <font class="preprocessor"></font>
00122 <font class="comment">/* If this bit is set, then `{...}' defines an interval, and \{ and \}</font>
00123 <font class="comment">     are literals.</font>
00124 <font class="comment">  If not set, then `\{...\}' defines an interval.  */</font>
00125 <font class="preprocessor">#define RE_NO_BK_BRACES (RE_NEWLINE_ALT &lt;&lt; 1)</font>
00126 <font class="preprocessor"></font>
00127 <font class="comment">/* If this bit is set, (...) defines a group, and \( and \) are literals.</font>
00128 <font class="comment">   If not set, \(...\) defines a group, and ( and ) are literals.  */</font>
00129 <font class="preprocessor">#define RE_NO_BK_PARENS (RE_NO_BK_BRACES &lt;&lt; 1)</font>
00130 <font class="preprocessor"></font>
00131 <font class="comment">/* If this bit is set, then &lt;digit&gt; matches &lt;digit&gt;.</font>
00132 <font class="comment">   If not set, then &lt;digit&gt; is a back-reference.  */</font>
00133 <font class="preprocessor">#define RE_NO_BK_REFS (RE_NO_BK_PARENS &lt;&lt; 1)</font>
00134 <font class="preprocessor"></font>
00135 <font class="comment">/* If this bit is set, then | is an alternation operator, and \| is literal.</font>
00136 <font class="comment">   If not set, then \| is an alternation operator, and | is literal.  */</font>
00137 <font class="preprocessor">#define RE_NO_BK_VBAR (RE_NO_BK_REFS &lt;&lt; 1)</font>
00138 <font class="preprocessor"></font>
00139 <font class="comment">/* If this bit is set, then an ending range point collating higher</font>
00140 <font class="comment">     than the starting range point, as in [z-a], is invalid.</font>
00141 <font class="comment">   If not set, then when ending range point collates higher than the</font>
00142 <font class="comment">     starting range point, the range is ignored.  */</font>
00143 <font class="preprocessor">#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR &lt;&lt; 1)</font>
00144 <font class="preprocessor"></font>
00145 <font class="comment">/* If this bit is set, then an unmatched ) is ordinary.</font>
00146 <font class="comment">   If not set, then an unmatched ) is invalid.  */</font>
00147 <font class="preprocessor">#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES &lt;&lt; 1)</font>
00148 <font class="preprocessor"></font>
00149 <font class="comment">/* If this bit is set, succeed as soon as we match the whole pattern,</font>
00150 <font class="comment">   without further backtracking.  */</font>
00151 <font class="preprocessor">#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD &lt;&lt; 1)</font>
00152 <font class="preprocessor"></font>
00153 <font class="comment">/* If this bit is set, do not process the GNU regex operators.</font>
00154 <font class="comment">   If not set, then the GNU regex operators are recognized. */</font>
00155 <font class="preprocessor">#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING &lt;&lt; 1)</font>
00156 <font class="preprocessor"></font>
00157 <font class="comment">/* If this bit is set, turn on internal regex debugging.</font>
00158 <font class="comment">   If not set, and debugging was on, turn it off.</font>
00159 <font class="comment">   This only works if regex.c is compiled -DDEBUG.</font>
00160 <font class="comment">   We define this bit always, so that all that's needed to turn on</font>
00161 <font class="comment">   debugging is to recompile regex.c; the calling code can always have</font>
00162 <font class="comment">   this bit set, and it won't affect anything in the normal case. */</font>
00163 <font class="preprocessor">#define RE_DEBUG (RE_NO_GNU_OPS &lt;&lt; 1)</font>
00164 <font class="preprocessor"></font>
00165 <font class="comment">/* This global variable defines the particular regexp syntax to use (for</font>
00166 <font class="comment">   some interfaces).  When a regexp is compiled, the syntax used is</font>
00167 <font class="comment">   stored in the pattern buffer, so changing this does not affect</font>
00168 <font class="comment">   already-compiled regexps.  */</font>
00169   <font class="keyword">extern</font> reg_syntax_t re_syntax_options;
00170 
00171 <font class="comment">/* Define combinations of the above bits for the standard possibilities.</font>
00172 <font class="comment">   (The [[[ comments delimit what gets put into the Texinfo file, so</font>
00173 <font class="comment">   don't delete them!)  */</font>
00174 <font class="comment">/* [[[begin syntaxes]]] */</font>
00175 <font class="preprocessor">#define RE_SYNTAX_EMACS 0</font>
00176 <font class="preprocessor"></font>
00177 <font class="preprocessor">#define RE_SYNTAX_AWK                                                   \</font>
00178 <font class="preprocessor">  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL                     \</font>
00179 <font class="preprocessor">   | RE_NO_BK_PARENS              | RE_NO_BK_REFS                       \</font>
00180 <font class="preprocessor">   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES                  \</font>
00181 <font class="preprocessor">   | RE_DOT_NEWLINE               | RE_CONTEXT_INDEP_ANCHORS            \</font>
00182 <font class="preprocessor">   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)</font>
00183 <font class="preprocessor"></font>
00184 <font class="preprocessor">#define RE_SYNTAX_GNU_AWK                                               \</font>
00185 <font class="preprocessor">  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \</font>
00186 <font class="preprocessor">   &amp; ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))</font>
00187 <font class="preprocessor"></font>
00188 <font class="preprocessor">#define RE_SYNTAX_POSIX_AWK                                             \</font>
00189 <font class="preprocessor">  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS              \</font>
00190 <font class="preprocessor">   | RE_INTERVALS           | RE_NO_GNU_OPS)</font>
00191 <font class="preprocessor"></font>
00192 <font class="preprocessor">#define RE_SYNTAX_GREP                                                  \</font>
00193 <font class="preprocessor">  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES                         \</font>
00194 <font class="preprocessor">   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS                            \</font>
00195 <font class="preprocessor">   | RE_NEWLINE_ALT)</font>
00196 <font class="preprocessor"></font>
00197 <font class="preprocessor">#define RE_SYNTAX_EGREP                                                 \</font>
00198 <font class="preprocessor">  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS                    \</font>
00199 <font class="preprocessor">   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE                    \</font>
00200 <font class="preprocessor">   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS                             \</font>
00201 <font class="preprocessor">   | RE_NO_BK_VBAR)</font>
00202 <font class="preprocessor"></font>
00203 <font class="preprocessor">#define RE_SYNTAX_POSIX_EGREP                                           \</font>
00204 <font class="preprocessor">  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)</font>
00205 <font class="preprocessor"></font>
00206 <font class="comment">/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */</font>
00207 <font class="preprocessor">#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC</font>
00208 <font class="preprocessor"></font>
00209 <font class="preprocessor">#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC</font>
00210 <font class="preprocessor"></font>
00211 <font class="comment">/* Syntax bits common to both basic and extended POSIX regex syntax.  */</font>
00212 <font class="preprocessor">#define _RE_SYNTAX_POSIX_COMMON                                         \</font>
00213 <font class="preprocessor">  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL              \</font>
00214 <font class="preprocessor">   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)</font>
00215 <font class="preprocessor"></font>
00216 <font class="preprocessor">#define RE_SYNTAX_POSIX_BASIC                                           \</font>
00217 <font class="preprocessor">  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)</font>
00218 <font class="preprocessor"></font>
00219 <font class="comment">/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes</font>
00220 <font class="comment">   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this</font>
00221 <font class="comment">   isn't minimal, since other operators, such as \`, aren't disabled.  */</font>
00222 <font class="preprocessor">#define RE_SYNTAX_POSIX_MINIMAL_BASIC                                   \</font>
00223 <font class="preprocessor">  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)</font>
00224 <font class="preprocessor"></font>
00225 <font class="preprocessor">#define RE_SYNTAX_POSIX_EXTENDED                                        \</font>
00226 <font class="preprocessor">  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS                   \</font>
00227 <font class="preprocessor">   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES                            \</font>
00228 <font class="preprocessor">   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR                              \</font>
00229 <font class="preprocessor">   | RE_UNMATCHED_RIGHT_PAREN_ORD)</font>
00230 <font class="preprocessor"></font>
00231 <font class="comment">/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS</font>
00232 <font class="comment">   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */</font>
00233 <font class="preprocessor">#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED                                \</font>
00234 <font class="preprocessor">  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS                  \</font>
00235 <font class="preprocessor">   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES                           \</font>
00236 <font class="preprocessor">   | RE_NO_BK_PARENS        | RE_NO_BK_REFS                             \</font>
00237 <font class="preprocessor">   | RE_NO_BK_VBAR          | RE_UNMATCHED_RIGHT_PAREN_ORD)</font>
00238 <font class="preprocessor"></font><font class="comment">/* [[[end syntaxes]]] */</font>
00239 
00240 <font class="comment">/* Maximum number of duplicates an interval can allow.  Some systems</font>
00241 <font class="comment">   (erroneously) define this in other header files, but we want our</font>
00242 <font class="comment">   value, so remove any previous define.  */</font>
00243 <font class="preprocessor">#ifdef RE_DUP_MAX</font>
00244 <font class="preprocessor"></font><font class="preprocessor">#undef RE_DUP_MAX</font>
00245 <font class="preprocessor"></font><font class="preprocessor">#endif</font>
00246 <font class="preprocessor"></font><font class="comment">/* If sizeof(int) == 2, then ((1 &lt;&lt; 15) - 1) overflows.  */</font>
00247 <font class="preprocessor">#define RE_DUP_MAX (0x7fff)</font>
00248 <font class="preprocessor"></font>
00249 
00250 <font class="comment">/* POSIX `cflags' bits (i.e., information for `regcomp').  */</font>
00251 
00252 <font class="comment">/* If this bit is set, then use extended regular expression syntax.</font>
00253 <font class="comment">   If not set, then use basic regular expression syntax.  */</font>
00254 <font class="preprocessor">#define REG_EXTENDED 1</font>
00255 <font class="preprocessor"></font>
00256 <font class="comment">/* If this bit is set, then ignore case when matching.</font>
00257 <font class="comment">   If not set, then case is significant.  */</font>
00258 <font class="preprocessor">#define REG_ICASE (REG_EXTENDED &lt;&lt; 1)</font>
00259 <font class="preprocessor"></font>
00260 <font class="comment">/* If this bit is set, then anchors do not match at newline</font>
00261 <font class="comment">     characters in the string.</font>
00262 <font class="comment">   If not set, then anchors do match at newlines.  */</font>
00263 <font class="preprocessor">#define REG_NEWLINE (REG_ICASE &lt;&lt; 1)</font>
00264 <font class="preprocessor"></font>
00265 <font class="comment">/* If this bit is set, then report only success or fail in regexec.</font>
00266 <font class="comment">   If not set, then returns differ between not matching and errors.  */</font>
00267 <font class="preprocessor">#define REG_NOSUB (REG_NEWLINE &lt;&lt; 1)</font>
00268 <font class="preprocessor"></font>
00269 
00270 <font class="comment">/* POSIX `eflags' bits (i.e., information for regexec).  */</font>
00271 
00272 <font class="comment">/* If this bit is set, then the beginning-of-line operator doesn't match</font>
00273 <font class="comment">     the beginning of the string (presumably because it's not the</font>
00274 <font class="comment">     beginning of a line).</font>
00275 <font class="comment">   If not set, then the beginning-of-line operator does match the</font>
00276 <font class="comment">     beginning of the string.  */</font>
00277 <font class="preprocessor">#define REG_NOTBOL 1</font>
00278 <font class="preprocessor"></font>
00279 <font class="comment">/* Like REG_NOTBOL, except for the end-of-line.  */</font>
00280 <font class="preprocessor">#define REG_NOTEOL (1 &lt;&lt; 1)</font>
00281 <font class="preprocessor"></font>
00282 
00283 <font class="comment">/* If any error codes are removed, changed, or added, update the</font>
00284 <font class="comment">   `re_error_msg' table in regex.c.  */</font>
00285   <font class="keyword">typedef</font> <font class="keyword">enum</font>
00286   {
00287     REG_NOERROR = 0,            <font class="comment">/* Success.  */</font>
00288     REG_NOMATCH,                <font class="comment">/* Didn't find a match (for regexec).  */</font>
00289 
00290     <font class="comment">/* POSIX regcomp return error codes.  (In the order listed in the</font>
00291 <font class="comment">       standard.)  */</font>
00292     REG_BADPAT,                 <font class="comment">/* Invalid pattern.  */</font>
00293     REG_ECOLLATE,               <font class="comment">/* Not implemented.  */</font>
00294     REG_ECTYPE,                 <font class="comment">/* Invalid character class name.  */</font>
00295     REG_EESCAPE,                <font class="comment">/* Trailing backslash.  */</font>
00296     REG_ESUBREG,                <font class="comment">/* Invalid back reference.  */</font>
00297     REG_EBRACK,                 <font class="comment">/* Unmatched left bracket.  */</font>
00298     REG_EPAREN,                 <font class="comment">/* Parenthesis imbalance.  */</font>
00299     REG_EBRACE,                 <font class="comment">/* Unmatched \{.  */</font>
00300     REG_BADBR,                  <font class="comment">/* Invalid contents of \{\}.  */</font>
00301     REG_ERANGE,                 <font class="comment">/* Invalid range end.  */</font>
00302     REG_ESPACE,                 <font class="comment">/* Ran out of memory.  */</font>
00303     REG_BADRPT,                 <font class="comment">/* No preceding re for repetition op.  */</font>
00304 
00305     <font class="comment">/* Error codes we've added.  */</font>
00306     REG_EEND,                   <font class="comment">/* Premature end.  */</font>
00307     REG_ESIZE,                  <font class="comment">/* Compiled pattern bigger than 2^16 bytes.  */</font>
00308     REG_ERPAREN                 <font class="comment">/* Unmatched ) or \); not returned from regcomp.  */</font>
00309   }
00310   reg_errcode_t;
00311 
00312 <font class="comment">/* This data structure represents a compiled pattern.  Before calling</font>
00313 <font class="comment">   the pattern compiler, the fields `buffer', `allocated', `fastmap',</font>
00314 <font class="comment">   `translate', and `no_sub' can be set.  After the pattern has been</font>
00315 <font class="comment">   compiled, the `re_nsub' field is available.  All other fields are</font>
00316 <font class="comment">   private to the regex routines.  */</font>
00317 
00318 <font class="preprocessor">#ifndef RE_TRANSLATE_TYPE</font>
00319 <font class="preprocessor"></font><font class="preprocessor">#define RE_TRANSLATE_TYPE char *</font>
00320 <font class="preprocessor"></font><font class="preprocessor">#endif</font>
00321 <font class="preprocessor"></font>
00322   <font class="keyword">struct </font>re_pattern_buffer
00323   {
00324 <font class="comment">/* [[[begin pattern_buffer]]] */</font>
00325     <font class="comment">/* Space that holds the compiled pattern.  It is declared as</font>
00326 <font class="comment">       `unsigned char *' because its elements are</font>
00327 <font class="comment">       sometimes used as array indexes.  */</font>
00328     <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> *buffer;
00329 
00330     <font class="comment">/* Number of bytes to which `buffer' points.  */</font>
00331     <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> <font class="keywordtype">int</font> allocated;
00332 
00333     <font class="comment">/* Number of bytes actually used in `buffer'.  */</font>
00334     <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> <font class="keywordtype">int</font> used;
00335 
00336     <font class="comment">/* Syntax setting with which the pattern was compiled.  */</font>
00337     reg_syntax_t syntax;
00338 
00339     <font class="comment">/* Pointer to a fastmap, if any, otherwise zero.  re_search uses</font>
00340 <font class="comment">       the fastmap, if there is one, to skip over impossible</font>
00341 <font class="comment">       starting points for matches.  */</font>
00342     <font class="keywordtype">char</font> *fastmap;
00343 
00344     <font class="comment">/* Either a translate table to apply to all characters before</font>
00345 <font class="comment">       comparing them, or zero for no translation.  The translation</font>
00346 <font class="comment">       is applied to a pattern when it is compiled and to a string</font>
00347 <font class="comment">       when it is matched.  */</font>
00348     RE_TRANSLATE_TYPE translate;
00349 
00350     <font class="comment">/* Number of subexpressions found by the compiler.  */</font>
00351     size_t re_nsub;
00352 
00353     <font class="comment">/* Zero if this pattern cannot match the empty string, one else.</font>
00354 <font class="comment">       Well, in truth it's used only in `re_search_2', to see</font>
00355 <font class="comment">       whether or not we should use the fastmap, so we don't set</font>
00356 <font class="comment">       this absolutely perfectly; see `re_compile_fastmap' (the</font>
00357 <font class="comment">       `duplicate' case).  */</font>
00358     <font class="keywordtype">unsigned</font> can_be_null:1;
00359 
00360     <font class="comment">/* If REGS_UNALLOCATED, allocate space in the `regs' structure</font>
00361 <font class="comment">       for `max (RE_NREGS, re_nsub + 1)' groups.</font>
00362 <font class="comment">       If REGS_REALLOCATE, reallocate space if necessary.</font>
00363 <font class="comment">       If REGS_FIXED, use what's there.  */</font>
00364 <font class="preprocessor">#define REGS_UNALLOCATED 0</font>
00365 <font class="preprocessor"></font><font class="preprocessor">#define REGS_REALLOCATE 1</font>
00366 <font class="preprocessor"></font><font class="preprocessor">#define REGS_FIXED 2</font>
00367 <font class="preprocessor"></font>    <font class="keywordtype">unsigned</font> regs_allocated:2;
00368 
00369     <font class="comment">/* Set to zero when `regex_compile' compiles a pattern; set to one</font>
00370 <font class="comment">       by `re_compile_fastmap' if it updates the fastmap.  */</font>
00371     <font class="keywordtype">unsigned</font> fastmap_accurate:1;
00372 
00373     <font class="comment">/* If set, `re_match_2' does not return information about</font>
00374 <font class="comment">       subexpressions.  */</font>
00375     <font class="keywordtype">unsigned</font> no_sub:1;
00376 
00377     <font class="comment">/* If set, a beginning-of-line anchor doesn't match at the</font>
00378 <font class="comment">       beginning of the string.  */</font>
00379     <font class="keywordtype">unsigned</font> not_bol:1;
00380 
00381     <font class="comment">/* Similarly for an end-of-line anchor.  */</font>
00382     <font class="keywordtype">unsigned</font> not_eol:1;
00383 
00384     <font class="comment">/* If true, an anchor at a newline matches.  */</font>
00385     <font class="keywordtype">unsigned</font> newline_anchor:1;
00386 
00387 <font class="comment">/* [[[end pattern_buffer]]] */</font>
00388   };
00389 
00390   <font class="keyword">typedef</font> <font class="keyword">struct </font>re_pattern_buffer regex_t;
00391 
00392 <font class="comment">/* Type for byte offsets within the string.  POSIX mandates this.  */</font>
00393   <font class="keyword">typedef</font> <font class="keywordtype">int</font> regoff_t;
00394 
00395 
00396 <font class="comment">/* This is the structure we store register match data in.  See</font>
00397 <font class="comment">   regex.texinfo for a full description of what registers match.  */</font>
00398   <font class="keyword">struct </font>re_registers
00399   {
00400     <font class="keywordtype">unsigned</font> num_regs;
00401     regoff_t *start;
00402     regoff_t *end;
00403   };
00404 
00405 
00406 <font class="comment">/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,</font>
00407 <font class="comment">   `re_match_2' returns information about at least this many registers</font>
00408 <font class="comment">   the first time a `regs' structure is passed.  */</font>
00409 <font class="preprocessor">#ifndef RE_NREGS</font>
00410 <font class="preprocessor"></font><font class="preprocessor">#define RE_NREGS 30</font>
00411 <font class="preprocessor"></font><font class="preprocessor">#endif</font>
00412 <font class="preprocessor"></font>
00413 
00414 <font class="comment">/* POSIX specification for registers.  Aside from the different names than</font>
00415 <font class="comment">   `re_registers', POSIX uses an array of structures, instead of a</font>
00416 <font class="comment">   structure of arrays.  */</font>
00417   <font class="keyword">typedef</font> <font class="keyword">struct</font>
00418 <font class="keyword">  </font>{
00419     regoff_t rm_so;             <font class="comment">/* Byte offset from string's start to substring's start.  */</font>
00420     regoff_t rm_eo;             <font class="comment">/* Byte offset from string's start to substring's end.  */</font>
00421   }
00422   regmatch_t;
00423 
00424 <font class="comment">/* Declarations for routines.  */</font>
00425 
00426 <font class="comment">/* To avoid duplicating every routine declaration -- once with a</font>
00427 <font class="comment">        prototype (if we are ANSI), and once without (if we aren't) -- we</font>
00428 <font class="comment">   use the following macro to declare argument types.  This</font>
00429 <font class="comment">   unfortunately clutters up the declarations a bit, but I think it's</font>
00430 <font class="comment">   worth it.  */</font>
00431 <font class="preprocessor">#ifndef __STDC__</font>
00432 <font class="preprocessor"></font><font class="preprocessor">#define __STDC__ 1</font>
00433 <font class="preprocessor"></font><font class="preprocessor">#endif</font>
00434 <font class="preprocessor"></font><font class="preprocessor">#if __STDC__</font>
00435 <font class="preprocessor"></font>
00436 <font class="preprocessor">#define _RE_ARGS(args) args</font>
00437 <font class="preprocessor"></font>
00438 <font class="preprocessor">#else                           </font><font class="comment">/* not __STDC__ */</font>
00439 
00440 <font class="preprocessor">#define _RE_ARGS(args) ()</font>
00441 <font class="preprocessor"></font>
00442 <font class="preprocessor">#endif                          </font><font class="comment">/* not __STDC__ */</font>
00443 
00444 <font class="comment">/* Sets the current default syntax to SYNTAX, and return the old syntax.</font>
00445 <font class="comment">   You can also simply assign to the `re_syntax_options' variable.  */</font>
00446   <font class="keyword">extern</font> reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
00447 
00448 <font class="comment">/* Compile the regular expression PATTERN, with length LENGTH</font>
00449 <font class="comment">   and syntax given by the global `re_syntax_options', into the buffer</font>
00450 <font class="comment">   BUFFER.  Return NULL if successful, and an error string if not.  */</font>
00451   <font class="keyword">extern</font> <font class="keyword">const</font> <font class="keywordtype">char</font> *re_compile_pattern
00452     _RE_ARGS ((<font class="keyword">const</font> <font class="keywordtype">char</font> *pattern, size_t length,
00453                <font class="keyword">struct</font> re_pattern_buffer * buffer));
00454 
00455 
00456 <font class="comment">/* Compile a fastmap for the compiled pattern in BUFFER; used to</font>
00457 <font class="comment">   accelerate searches.  Return 0 if successful and -2 if was an</font>
00458 <font class="comment">   internal error.  */</font>
00459   <font class="keyword">extern</font> <font class="keywordtype">int</font> re_compile_fastmap
00460     _RE_ARGS ((<font class="keyword">struct</font> re_pattern_buffer * buffer));
00461 
00462 
00463 <font class="comment">/* Search in the string STRING (with length LENGTH) for the pattern</font>
00464 <font class="comment">   compiled into BUFFER.  Start searching at position START, for RANGE</font>
00465 <font class="comment">   characters.  Return the starting position of the match, -1 for no</font>
00466 <font class="comment">   match, or -2 for an internal error.  Also return register</font>
00467 <font class="comment">   information in REGS (if REGS and BUFFER-&gt;no_sub are nonzero).  */</font>
00468   <font class="keyword">extern</font> <font class="keywordtype">int</font> re_search
00469     _RE_ARGS ((<font class="keyword">struct</font> re_pattern_buffer * buffer, <font class="keyword">const</font> <font class="keywordtype">char</font> *string,
00470                <font class="keywordtype">int</font> length, <font class="keywordtype">int</font> start, <font class="keywordtype">int</font> range, <font class="keyword">struct</font> re_registers * regs));
00471 
00472 
00473 <font class="comment">/* Like `re_search', but search in the concatenation of STRING1 and</font>
00474 <font class="comment">   STRING2.  Also, stop searching at index START + STOP.  */</font>
00475   <font class="keyword">extern</font> <font class="keywordtype">int</font> re_search_2
00476     _RE_ARGS ((<font class="keyword">struct</font> re_pattern_buffer * buffer, <font class="keyword">const</font> <font class="keywordtype">char</font> *string1,
00477                <font class="keywordtype">int</font> length1, <font class="keyword">const</font> <font class="keywordtype">char</font> *string2, <font class="keywordtype">int</font> length2,
00478                <font class="keywordtype">int</font> start, <font class="keywordtype">int</font> range, <font class="keyword">struct</font> re_registers * regs, <font class="keywordtype">int</font> stop));
00479 
00480 
00481 <font class="comment">/* Like `re_search', but return how many characters in STRING the regexp</font>
00482 <font class="comment">   in BUFFER matched, starting at position START.  */</font>
00483   <font class="keyword">extern</font> <font class="keywordtype">int</font> re_match
00484     _RE_ARGS ((<font class="keyword">struct</font> re_pattern_buffer * buffer, <font class="keyword">const</font> <font class="keywordtype">char</font> *string,
00485                <font class="keywordtype">int</font> length, <font class="keywordtype">int</font> start, <font class="keyword">struct</font> re_registers * regs));
00486 
00487 
00488 <font class="comment">/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */</font>
00489   <font class="keyword">extern</font> <font class="keywordtype">int</font> re_match_2
00490     _RE_ARGS ((<font class="keyword">struct</font> re_pattern_buffer * buffer, <font class="keyword">const</font> <font class="keywordtype">char</font> *string1,
00491                <font class="keywordtype">int</font> length1, <font class="keyword">const</font> <font class="keywordtype">char</font> *string2, <font class="keywordtype">int</font> length2,
00492                <font class="keywordtype">int</font> start, <font class="keyword">struct</font> re_registers * regs, <font class="keywordtype">int</font> stop));
00493 
00494 
00495 <font class="comment">/* Set REGS to hold NUM_REGS registers, storing them in STARTS and</font>
00496 <font class="comment">   ENDS.  Subsequent matches using BUFFER and REGS will use this memory</font>
00497 <font class="comment">   for recording register information.  STARTS and ENDS must be</font>
00498 <font class="comment">   allocated with malloc, and must each be at least `NUM_REGS * sizeof</font>
00499 <font class="comment">   (regoff_t)' bytes long.</font>
00500 <font class="comment"></font>
00501 <font class="comment">   If NUM_REGS == 0, then subsequent matches should allocate their own</font>
00502 <font class="comment">   register data.</font>
00503 <font class="comment"></font>
00504 <font class="comment">   Unless this function is called, the first search or match using</font>
00505 <font class="comment">   PATTERN_BUFFER will allocate its own register data, without</font>
00506 <font class="comment">   freeing the old data.  */</font>
00507   <font class="keyword">extern</font> <font class="keywordtype">void</font> re_set_registers
00508     _RE_ARGS ((<font class="keyword">struct</font> re_pattern_buffer * buffer, <font class="keyword">struct</font> re_registers * regs,
00509                <font class="keywordtype">unsigned</font> num_regs, regoff_t * starts, regoff_t * ends));
00510 
00511 <font class="preprocessor">#ifdef _REGEX_RE_COMP</font>
00512 <font class="preprocessor"></font><font class="preprocessor">#ifndef _CRAY</font>
00513 <font class="preprocessor"></font><font class="comment">/* 4.2 bsd compatibility.  */</font>
00514   <font class="keyword">extern</font> <font class="keywordtype">char</font> *re_comp _RE_ARGS ((<font class="keyword">const</font> <font class="keywordtype">char</font> *));
00515   <font class="keyword">extern</font> <font class="keywordtype">int</font> re_exec _RE_ARGS ((<font class="keyword">const</font> <font class="keywordtype">char</font> *));
00516 <font class="preprocessor">#endif</font>
00517 <font class="preprocessor"></font><font class="preprocessor">#endif</font>
00518 <font class="preprocessor"></font>
00519 <font class="comment">/* POSIX compatibility.  */</font>
00520   <font class="keyword">extern</font> <font class="keywordtype">int</font> regcomp
00521     _RE_ARGS ((regex_t * preg, <font class="keyword">const</font> <font class="keywordtype">char</font> *pattern, <font class="keywordtype">int</font> cflags));
00522   <font class="keyword">extern</font> <font class="keywordtype">int</font> regexec
00523     _RE_ARGS (
00524               (<font class="keyword">const</font> regex_t * preg, <font class="keyword">const</font> <font class="keywordtype">char</font> *string, size_t nmatch,
00525                regmatch_t pmatch[], <font class="keywordtype">int</font> eflags));
00526   <font class="keyword">extern</font> size_t regerror
00527     _RE_ARGS (
00528               (<font class="keywordtype">int</font> errcode, <font class="keyword">const</font> regex_t * preg, <font class="keywordtype">char</font> *errbuf,
00529                size_t errbuf_size));
00530   <font class="keyword">extern</font> <font class="keywordtype">void</font> regfree _RE_ARGS ((regex_t * preg));
00531 
00532 
00533 <font class="preprocessor">#ifdef __cplusplus</font>
00534 <font class="preprocessor"></font>}
00535 <font class="preprocessor">#endif                          </font><font class="comment">/* C++ */</font>
00536 
00537 <font class="preprocessor">#endif                          </font><font class="comment">/* regex.h */</font>
00538 
00539 <font class="comment">/*</font>
00540 <font class="comment">Local variables:</font>
00541 <font class="comment">make-backup-files: t</font>
00542 <font class="comment">version-control: t</font>
00543 <font class="comment">trim-versions-without-asking: nil</font>
00544 <font class="comment">End:</font>
00545 <font class="comment">*/</font>
</pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:13:00 2002 for The Sword Project by
<a href="http://www.doxygen.org/index.html">
<img src="doxygen.png" alt="doxygen" align="middle" border=0 
width=110 height=53></a>1.2.15 </small></address>
</body>
</html>