Apache HTTPD
util_pcre.c
Go to the documentation of this file.
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This code is based on pcreposix.c from the PCRE Library distribution,
18 * as originally written by Philip Hazel <[email protected]>, and forked by
19 * the Apache HTTP Server project to provide POSIX-style regex function
20 * wrappers around underlying PCRE library functions for httpd.
21 *
22 * The original source file pcreposix.c is copyright and licensed as follows;
23
24 Copyright (c) 1997-2004 University of Cambridge
25
26-----------------------------------------------------------------------------
27Redistribution and use in source and binary forms, with or without
28modification, are permitted provided that the following conditions are met:
29
30 * Redistributions of source code must retain the above copyright notice,
31 this list of conditions and the following disclaimer.
32
33 * Redistributions in binary form must reproduce the above copyright
34 notice, this list of conditions and the following disclaimer in the
35 documentation and/or other materials provided with the distribution.
36
37 * Neither the name of the University of Cambridge nor the names of its
38 contributors may be used to endorse or promote products derived from
39 this software without specific prior written permission.
40
41THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
42AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
45LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
46CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
47SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
48INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
49CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
51POSSIBILITY OF SUCH DAMAGE.
52-----------------------------------------------------------------------------
53*/
54
55#include "httpd.h"
56#include "apr_strings.h"
57#include "apr_tables.h"
58#include "apr_thread_proc.h"
59
60#ifdef HAVE_PCRE2
61#define PCRE2_CODE_UNIT_WIDTH 8
62#include "pcre2.h"
63#define PCREn(x) PCRE2_ ## x
64#else
65#include "pcre.h"
66#define PCREn(x) PCRE_ ## x
67#endif
68
69/* PCRE_DUPNAMES is only present since version 6.7 of PCRE */
70#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2)
71#error PCRE Version 6.7 or later required!
72#else
73
74#define APR_WANT_STRFUNC
75#include "apr_want.h"
76
77#ifndef POSIX_MALLOC_THRESHOLD
78#define POSIX_MALLOC_THRESHOLD (10)
79#endif
80
81/* Table of error strings corresponding to POSIX error codes; must be
82 * kept in synch with include/ap_regex.h's AP_REG_E* definitions.
83 */
84
85static const char *const pstring[] = {
86 "", /* Dummy for value 0 */
87 "internal error", /* AP_REG_ASSERT */
88 "failed to get memory", /* AP_REG_ESPACE */
89 "bad argument", /* AP_REG_INVARG */
90 "match failed" /* AP_REG_NOMATCH */
91};
92
94{
95#ifdef HAVE_PCRE2
96 static char buf[80];
97#endif
98 switch (which) {
102#ifdef HAVE_PCRE2
104 return buf;
105#else
106 return pcre_version();
107#endif
108 default:
109 return "Unknown";
110 }
111}
112
115{
116 const char *message, *addmessage;
118
119 message = (errcode >= (int)(sizeof(pstring) / sizeof(char *))) ?
120 "unknown error code" : pstring[errcode];
121 length = strlen(message) + 1;
122
123 addmessage = " at offset ";
124 addlength = (preg != NULL && (int)preg->re_erroffset != -1) ?
125 strlen(addmessage) + 6 : 0;
126
127 if (errbuf_size > 0) {
128 if (addlength > 0 && errbuf_size >= length + addlength)
130 (int)preg->re_erroffset);
131 else
133 }
134
135 return length + addlength;
136}
137
138
139
140
141/*************************************************
142 * Free store held by a regex *
143 *************************************************/
144
146{
147#ifdef HAVE_PCRE2
149#else
151#endif
152}
153
154
155
156
157/*************************************************
158 * Compile a regular expression *
159 *************************************************/
160
161static int default_cflags = AP_REG_DEFAULT;
162
164{
165 return default_cflags;
166}
167
169{
171}
172
174{
175 int cflag = 0;
176
177 if (ap_cstr_casecmp(name, "ICASE") == 0) {
179 }
180 else if (ap_cstr_casecmp(name, "DOTALL") == 0) {
182 }
183 else if (ap_cstr_casecmp(name, "DOLLAR_ENDONLY") == 0) {
185 }
186 else if (ap_cstr_casecmp(name, "EXTENDED") == 0) {
188 }
189
190 return cflag;
191}
192
193/*
194 * Arguments:
195 * preg points to a structure for recording the compiled expression
196 * pattern the pattern to compile
197 * cflags compilation flags
198 *
199 * Returns: 0 on success
200 * various non-zero codes on failure
201*/
202AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
203{
204#ifdef HAVE_PCRE2
206 size_t erroffset;
207#else
208 const char *errorptr;
209 int erroffset;
210#endif
211 int errcode = 0;
212 int options = PCREn(DUPNAMES);
213
214 if ((cflags & AP_REG_NO_DEFAULT) == 0)
216
217 if ((cflags & AP_REG_ICASE) != 0)
218 options |= PCREn(CASELESS);
219 if ((cflags & AP_REG_NEWLINE) != 0)
220 options |= PCREn(MULTILINE);
221 if ((cflags & AP_REG_DOTALL) != 0)
222 options |= PCREn(DOTALL);
223 if ((cflags & AP_REG_DOLLAR_ENDONLY) != 0)
224 options |= PCREn(DOLLAR_ENDONLY);
225
226#ifdef HAVE_PCRE2
227 preg->re_pcre = pcre2_compile((const unsigned char *)pattern,
229 &erroffset, NULL);
230#else
233#endif
234
236 if (preg->re_pcre == NULL) {
237 /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */
238 if (errcode == 21)
239 return AP_REG_ESPACE;
240 return AP_REG_INVARG;
241 }
242
243#ifdef HAVE_PCRE2
247#else
250#endif
251 return 0;
252}
253
254
255
256
257/*************************************************
258 * Match a regular expression *
259 *************************************************/
260
261/* Unfortunately, PCRE requires 3 ints of working space for each captured
262 * substring, so we have to get and release working store instead of just using
263 * the POSIX structures as was done in earlier releases when PCRE needed only 2
264 * ints. However, if the number of possible capturing brackets is small, use a
265 * block of store on the stack, to reduce the use of malloc/free. The threshold
266 * is in a macro that can be changed at configure time.
267 * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
268 * to allocate and free it, so to minimize these calls we maintain one opaque
269 * context per thread (in Thread Local Storage, TLS) grown as needed, and while
270 * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
271 * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
272 * from/to ap_thread_current() when AP_HAS_THREAD_LOCAL; otherwise we'll do
273 * the allocation and freeing for each ap_regexec().
274 */
275
276#ifdef HAVE_PCRE2
278typedef size_t* match_vector_pt;
279#else
280typedef int* match_data_pt;
281typedef int* match_vector_pt;
282#endif
283
284static APR_INLINE
287{
289
290#ifdef HAVE_PCRE2
292#else
294 data = malloc(size * sizeof(int) * 3);
295 }
296 else {
298 }
299#endif
300
301 return data;
302}
303
304static APR_INLINE
306{
307#ifdef HAVE_PCRE2
309#else
311 free(data);
312 }
313#endif
314}
315
316#if AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL)
317
318struct apreg_tls {
321};
322
323#ifdef HAVE_PCRE2
325{
326 struct apreg_tls *tls = arg;
327 pcre2_match_data_free(tls->data); /* NULL safe */
328 return APR_SUCCESS;
329}
330#endif
331
334 int *to_free)
335{
336 apr_thread_t *current;
337 struct apreg_tls *tls = NULL;
338
339 /* Even though AP_HAS_THREAD_LOCAL, we may still be called by a
340 * native/non-apr thread, let's fall back to alloc/free in this case.
341 */
342 current = ap_thread_current();
343 if (!current) {
344 *to_free = 1;
346 }
347
348 apr_thread_data_get((void **)&tls, "apreg", current);
349 if (!tls || tls->size < size) {
350 apr_pool_t *tp = apr_thread_pool_get(current);
351 if (!tls) {
352 tls = apr_pcalloc(tp, sizeof(*tls));
353#ifdef HAVE_PCRE2
354 apr_thread_data_set(tls, "apreg", apreg_tls_cleanup, current);
355#else
356 apr_thread_data_set(tls, "apreg", NULL, current);
357#endif
358 }
359
360 tls->size *= 2;
361 if (tls->size < size) {
362 tls->size = size;
363 if (tls->size < POSIX_MALLOC_THRESHOLD) {
365 }
366 }
367
368#ifdef HAVE_PCRE2
369 pcre2_match_data_free(tls->data); /* NULL safe */
370 tls->data = pcre2_match_data_create(tls->size, NULL);
371 if (!tls->data) {
372 tls->size = 0;
373 return NULL;
374 }
375#else
376 tls->data = apr_palloc(tp, tls->size * sizeof(int) * 3);
377#endif
378 }
379
380 return tls->data;
381}
382
383#else /* AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL) */
384
387 int *to_free)
388{
389 *to_free = 1;
391}
392
393#endif /* AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL) */
394
395AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
397 int eflags)
398{
399 return ap_regexec_len(preg, string, strlen(string), nmatch, pmatch,
400 eflags);
401}
402
403AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
406{
407 int rc;
408 int options = 0, to_free = 0;
411#ifdef HAVE_PCRE2
413#else
416#endif
417
418 if (!data) {
419 return AP_REG_ESPACE;
420 }
421
422 if ((eflags & AP_REG_NOTBOL) != 0)
423 options |= PCREn(NOTBOL);
424 if ((eflags & AP_REG_NOTEOL) != 0)
425 options |= PCREn(NOTEOL);
426
427#ifdef HAVE_PCRE2
429 (const unsigned char *)buff, len,
430 0, options, data, NULL);
432#else
433 ovector = data;
434 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
435 0, options, ovector, ncaps * 3);
436#endif
437
438 if (rc >= 0) {
439 apr_size_t n = rc, i;
440 if (n == 0 || n > nmatch)
441 rc = n = nmatch; /* All capture slots were filled in */
442 for (i = 0; i < n; i++) {
443 pmatch[i].rm_so = ovector[i * 2];
444 pmatch[i].rm_eo = ovector[i * 2 + 1];
445 }
446 for (; i < nmatch; i++)
447 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
448 if (to_free) {
450 }
451 return 0;
452 }
453 else {
454 if (to_free) {
456 }
457#ifdef HAVE_PCRE2
459 return AP_REG_INVARG;
460#endif
461 switch (rc) {
462 case PCREn(ERROR_NOMATCH):
464 case PCREn(ERROR_NULL):
468 case PCREn(ERROR_BADMAGIC):
470 case PCREn(ERROR_NOMEMORY):
472#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT)
475#endif
476#if defined(PCRE_ERROR_UNKNOWN_NODE)
478 return AP_REG_ASSERT;
479#endif
480#if defined(PCRE_ERROR_BADUTF8)
481 case PCREn(ERROR_BADUTF8):
483#endif
484#if defined(PCRE_ERROR_BADUTF8_OFFSET)
487#endif
488 default:
489 return AP_REG_ASSERT;
490 }
491 }
492}
493
495 apr_array_header_t *names, const char *prefix,
496 int upper)
497{
498 char *nametable;
499
500#ifdef HAVE_PCRE2
503 uint32_t i;
510#else
511 int namecount;
512 int nameentrysize;
513 int i;
520#endif
521
522 for (i = 0; i < namecount; i++) {
523 const char *offset = nametable + i * nameentrysize;
524 int capture = ((offset[0] << 8) + offset[1]);
525 while (names->nelts <= capture) {
527 }
528 if (upper || prefix) {
529 char *name = ((char **) names->elts)[capture] =
530 prefix ? apr_pstrcat(names->pool, prefix, offset + 2,
531 NULL) :
533 if (upper) {
535 }
536 }
537 else {
538 ((const char **)names->elts)[capture] = offset + 2;
539 }
540 }
541
542 return namecount;
543}
544
545#endif /* PCRE_DUPNAMES defined */
546
547/* End of pcreposix.c */
#define AP_DECLARE(type)
Definition ap_config.h:67
@ AP_REG_PCRE_LOADED
Definition ap_regex.h:96
@ AP_REG_PCRE_COMPILED
Definition ap_regex.h:95
int n
Definition ap_regex.h:278
const char * pattern
Definition ap_regex.h:243
#define AP_REG_ICASE
Definition ap_regex.h:73
#define AP_REG_DOLLAR_ENDONLY
Definition ap_regex.h:85
const char apr_size_t ap_regmatch_t * pmatch
Definition ap_regex.h:172
#define AP_REG_DOTALL
Definition ap_regex.h:83
const char * buff
Definition ap_regex.h:186
const char int cflags
Definition ap_regex.h:159
const ap_regex_t * preg
Definition ap_regex.h:197
apr_array_header_t const char int upper
Definition ap_regex.h:210
const ap_regex_t char apr_size_t errbuf_size
Definition ap_regex.h:198
#define AP_REG_DEFAULT
Definition ap_regex.h:91
#define AP_REG_NO_DEFAULT
Definition ap_regex.h:87
#define AP_REG_NOTEOL
Definition ap_regex.h:76
const char apr_size_t ap_regmatch_t int eflags
Definition ap_regex.h:172
const char apr_size_t len
Definition ap_regex.h:187
#define AP_REG_NEWLINE
Definition ap_regex.h:74
#define AP_REG_NOTBOL
Definition ap_regex.h:75
const char apr_size_t nmatch
Definition ap_regex.h:172
@ AP_REG_ESPACE
Definition ap_regex.h:102
@ AP_REG_NOMATCH
Definition ap_regex.h:104
@ AP_REG_INVARG
Definition ap_regex.h:103
@ AP_REG_ASSERT
Definition ap_regex.h:101
#define AP_REG_EXTENDED
Definition ap_regex.h:78
const ap_regex_t char * errbuf
Definition ap_regex.h:198
#define APR_STRINGIFY(n)
Definition ap_release.h:62
APR Strings library.
APR Table library.
APR Thread and Process Library.
APR Standard Headers Support.
const unsigned char * buf
Definition util_md5.h:50
void const char * arg
Definition http_vhost.h:63
int apr_off_t * length
int * errcode
Definition apr_dbm.h:183
apr_redis_t * rc
Definition apr_redis.h:173
int ap_cstr_casecmp(const char *s1, const char *s2)
Definition util.c:3542
void ap_str_toupper(char *s)
Definition util.c:2418
apr_size_t size
const char int apr_pool_t * pool
Definition apr_cstr.h:84
#define APR_SUCCESS
Definition apr_errno.h:225
int apr_status_t
Definition apr_errno.h:44
apr_seek_where_t apr_off_t * offset
void * data
apr_interface_e which
#define apr_pcalloc(p, size)
Definition apr_pools.h:465
HTTP Daemon routines.
#define DATE
return NULL
Definition mod_so.c:359
int i
Definition mod_so.c:347
apr_status_t apr_thread_data_set(void *data, const char *key, apr_status_t(*cleanup)(void *), apr_thread_t *thread)
Definition thread.c:198
apr_status_t apr_thread_data_get(void **data, const char *key, apr_thread_t *thread)
Definition thread.c:186
char * name
apr_size_t re_erroffset
Definition ap_regex.h:111
int re_nsub
Definition ap_regex.h:110
void * re_pcre
Definition ap_regex.h:109
#define PCREn(x)
Definition util_pcre.c:66
typedef int(WSAAPI *apr_winapi_fpt_WSAPoll)(IN OUT LPWSAPOLLFD fdArray