Apache HTTPD
util_regex.c
Go to the documentation of this file.
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr.h"
18#include "apr_lib.h"
19#include "apr_pools.h"
20#include "apr_strings.h"
21#include "ap_config.h"
22#include "ap_regex.h"
23#include "httpd.h"
24
26{
28 return APR_SUCCESS;
29}
30
32 const char *pattern)
33{
34 /* perl style patterns
35 * add support for more as and when wanted
36 * substitute: s/rx/subs/
37 * match: m/rx/ or just /rx/
38 */
39
40 /* allow any nonalnum delimiter as first or second char.
41 * If we ever use this with non-string pattern we'll need an extra check
42 */
43 const char *endp = 0;
44 const char *str = pattern;
45 const char *rxstr;
47 char delim = 0;
48 enum { SUBSTITUTE = 's', MATCH = 'm'} action = MATCH;
49
50 if (!apr_isalnum(pattern[0])) {
51 delim = *str++;
52 }
53 else if (pattern[0] == 's' && !apr_isalnum(pattern[1])) {
54 action = SUBSTITUTE;
55 delim = pattern[1];
56 str += 2;
57 }
58 else if (pattern[0] == 'm' && !apr_isalnum(pattern[1])) {
59 delim = pattern[1];
60 str += 2;
61 }
62 /* TODO: support perl's after/before */
63 /* FIXME: fix these simplminded delims */
64
65 /* we think there's a delimiter. Allow for it not to be if unmatched */
66 if (delim) {
67 endp = ap_strchr_c(str, delim);
68 }
69 if (!endp) { /* there's no delim or flags */
70 if (ap_regcomp(&ret->rx, pattern, 0) == 0) {
73 return ret;
74 }
75 else {
76 return NULL;
77 }
78 }
79
80 /* We have a delimiter. Use it to extract the regexp */
82
83 /* If it's a substitution, we need the replacement string
84 * TODO: possible future enhancement - support other parsing
85 * in the replacement string.
86 */
87 if (action == SUBSTITUTE) {
88 str = endp+1;
89 if (!*str || (endp = ap_strchr_c(str, delim), !endp)) {
90 /* missing replacement string is an error */
91 return NULL;
92 }
93 ret->subs = apr_pstrmemdup(pool, str, endp-str);
94 }
95
96 /* anything after the current delimiter is flags */
98 while (*++endp) {
99 switch (*endp) {
100 case 'i': ret->flags |= AP_REG_ICASE; break;
101 case 'm': ret->flags |= AP_REG_NEWLINE; break;
102 case 'n': ret->flags |= AP_REG_NOMEM; break;
103 case 'g': ret->flags |= AP_REG_MULTI; break;
104 case 's': ret->flags |= AP_REG_DOTALL; break;
105 case '^': ret->flags |= AP_REG_NOTBOL; break;
106 case '$': ret->flags |= AP_REG_NOTEOL; break;
107 default: break; /* we should probably be stricter here */
108 }
109 }
110 if (ap_regcomp(&ret->rx, rxstr, AP_REG_NO_DEFAULT | ret->flags) == 0) {
113 }
114 else {
115 return NULL;
116 }
117 if (!(ret->flags & AP_REG_NOMEM)) {
118 /* count size of memory required, starting at 1 for the whole-match
119 * Simpleminded should be fine 'cos regcomp already checked syntax
120 */
121 ret->nmatch = 1;
122 while (*rxstr) {
123 switch (*rxstr++) {
124 case '\\': /* next char is escaped - skip it */
125 if (*rxstr != 0) {
126 ++rxstr;
127 }
128 break;
129 case '(': /* unescaped bracket implies memory */
130 ++ret->nmatch;
131 break;
132 default:
133 break;
134 }
135 }
136 ret->pmatch = apr_palloc(pool, ret->nmatch*sizeof(ap_regmatch_t));
137 }
138 return ret;
139}
140
142 const char *pattern, char **newpattern)
143{
144 int ret = 1;
145 int startl, oldl, newl, diffsz;
146 const char *remainder;
147 char *subs;
148/* snrf process_regexp from mod_headers */
149 if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags) != 0) {
150 rx->match = NULL;
151 return 0; /* no match, nothing to do */
152 }
153 rx->match = pattern;
154 if (rx->subs) {
156 rx->nmatch, rx->pmatch);
157 if (!*newpattern) {
158 return 0; /* FIXME - should we do more to handle error? */
159 }
160 startl = rx->pmatch[0].rm_so;
161 oldl = rx->pmatch[0].rm_eo - startl;
162 newl = strlen(*newpattern);
163 diffsz = newl - oldl;
165 if (rx->flags & AP_REG_MULTI) {
166 /* recurse to do any further matches */
168 if (ret > 1) {
169 /* a further substitution happened */
170 diffsz += strlen(subs) - strlen(remainder);
171 remainder = subs;
172 }
173 }
174 subs = apr_palloc(pool, strlen(pattern) + 1 + diffsz);
178 *newpattern = subs;
179 }
180 return ret;
181}
182#ifdef DOXYGEN
184{
185 return (rx->match != NULL) ? rx->nmatch : 0;
186}
187#endif
188
189/* If this blows up on you, see the notes in the header/apidoc
190 * rx->match is a pointer and it's your responsibility to ensure
191 * it hasn't gone out-of-scope since the last ap_rxplus_exec
192 */
194 const char **match)
195{
196 if (n >= 0 && n < ap_rxplus_nmatch(rx)) {
197 *match = rx->match + rx->pmatch[n].rm_so;
198 *len = rx->pmatch[n].rm_eo - rx->pmatch[n].rm_so;
199 }
200 else {
201 *len = -1;
202 *match = NULL;
203 }
204}
206{
207 int len;
208 const char *match;
210 return apr_pstrndup(pool, match, len);
211}
Symbol export macros and hook functions.
#define AP_DECLARE(type)
Definition ap_config.h:67
Apache Regex defines.
int n
Definition ap_regex.h:278
int int const char ** match
Definition ap_regex.h:279
const char * pattern
Definition ap_regex.h:243
#define AP_REG_ICASE
Definition ap_regex.h:73
#define AP_REG_DOLLAR_ENDONLY
Definition ap_regex.h:85
#define AP_REG_DOTALL
Definition ap_regex.h:83
const ap_regex_t * preg
Definition ap_regex.h:197
ap_rxplus_t * rx
Definition ap_regex.h:256
#define AP_REG_NO_DEFAULT
Definition ap_regex.h:87
#define AP_REG_MULTI
Definition ap_regex.h:81
#define AP_REG_NOTEOL
Definition ap_regex.h:76
#define ap_rxplus_nmatch(rx)
Definition ap_regex.h:267
#define AP_REG_NOMEM
Definition ap_regex.h:82
const char apr_size_t len
Definition ap_regex.h:187
#define AP_REG_NEWLINE
Definition ap_regex.h:74
#define AP_REG_NOTBOL
Definition ap_regex.h:75
ap_rxplus_t const char char ** newpattern
Definition ap_regex.h:257
APR general purpose library routines.
APR memory allocation.
APR Strings library.
#define ap_strchr_c(s, c)
Definition httpd.h:2353
char * ap_pregsub(apr_pool_t *p, const char *input, const char *source, apr_size_t nmatch, ap_regmatch_t pmatch[])
Definition util.c:457
apr_size_t size
const char int apr_pool_t * pool
Definition apr_cstr.h:84
#define apr_isalnum(c)
Definition apr_lib.h:203
#define APR_SUCCESS
Definition apr_errno.h:225
int apr_status_t
Definition apr_errno.h:44
#define apr_pcalloc(p, size)
Definition apr_pools.h:465
HTTP Daemon routines.
md_subscription * subs
Definition md_event.c:33
return NULL
Definition mod_so.c:359
ap_regex_t rx
Definition ap_regex.h:220
const char * subs
Definition ap_regex.h:222
const char * match
Definition ap_regex.h:223
apr_uint32_t flags
Definition ap_regex.h:221
ap_regmatch_t * pmatch
Definition ap_regex.h:225
apr_size_t nmatch
Definition ap_regex.h:224
#define str
int ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx, const char *pattern, char **newpattern)
Definition util_regex.c:141
char * ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n)
Definition util_regex.c:205
ap_rxplus_t * ap_rxplus_compile(apr_pool_t *pool, const char *pattern)
Definition util_regex.c:31
void ap_rxplus_match(ap_rxplus_t *rx, int n, int *len, const char **match)
Definition util_regex.c:193
static apr_status_t rxplus_cleanup(void *preg)
Definition util_regex.c:25