Apache HTTPD
apr_uri.c
Go to the documentation of this file.
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * apr_uri.c: URI related utility things
19 *
20 */
21
22#include <stdlib.h>
23
24#include "apu.h"
25#include "apr.h"
26#include "apr_general.h"
27#include "apr_strings.h"
28
29#define APR_WANT_STRFUNC
30#include "apr_want.h"
31
32#include "apr_uri.h"
33
34typedef struct schemes_t schemes_t;
35
37struct schemes_t {
39 const char *name;
42};
43
44/* Some WWW schemes and their default ports; this is basically /etc/services */
45/* This will become global when the protocol abstraction comes */
46/* As the schemes are searched by a linear search, */
47/* they are sorted by their expected frequency */
49{
62 {"z39.50r", APR_URI_WAIS_DEFAULT_PORT},
63 {"z39.50s", APR_URI_WAIS_DEFAULT_PORT},
70 { NULL, 0xFFFF } /* unknown port */
71};
72
74{
75 schemes_t *scheme;
76
77 if (scheme_str) {
78 for (scheme = schemes; scheme->name != NULL; ++scheme) {
79 if (strcasecmp(scheme_str, scheme->name) == 0) {
80 return scheme->default_port;
81 }
82 }
83 }
84 return 0;
85}
86
87/* Unparse a apr_uri_t structure to an URI string.
88 * Optionally suppress the password for security reasons.
89 */
91 const apr_uri_t *uptr,
92 unsigned flags)
93{
94 char *ret = "";
95
96 /* If suppressing the site part, omit both user name & scheme://hostname */
98
99 /* Construct a "user:password@" string, honoring the passed
100 * APR_URI_UNP_ flags: */
101 if (uptr->user || uptr->password) {
102 ret = apr_pstrcat(p,
104 ? uptr->user : "",
106 ? ":" : "",
109 ? uptr->password : "XXXXXXXX")
110 : "",
111 ((uptr->user && !(flags & APR_URI_UNP_OMITUSER)) ||
113 ? "@" : "",
114 NULL);
115 }
116
117 /* Construct scheme://site string */
118 if (uptr->hostname) {
119 int is_default_port;
120 const char *lbrk = "", *rbrk = "";
121
122 if (strchr(uptr->hostname, ':')) { /* v6 literal */
123 lbrk = "[";
124 rbrk = "]";
125 }
126
128 (uptr->port_str == NULL ||
129 uptr->port == 0 ||
131
132 ret = apr_pstrcat(p, "//", ret, lbrk, uptr->hostname, rbrk,
133 is_default_port ? "" : ":",
135 NULL);
136 }
137 if (uptr->scheme) {
138 ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL);
139 }
140 }
141
142 /* Should we suppress all path info? */
144 /* Append path, query and fragment strings: */
145 ret = apr_pstrcat(p,
146 ret,
147 (uptr->path)
148 ? uptr->path : "",
150 ? "?" : "",
152 ? uptr->query : "",
154 ? "#" : NULL,
156 ? uptr->fragment : NULL,
157 NULL);
158 }
159 return ret;
160}
161
162/* Here is the hand-optimized parse_uri_components(). There are some wild
163 * tricks we could pull in assembly language that we don't pull here... like we
164 * can do word-at-time scans for delimiter characters using the same technique
165 * that fast memchr()s use. But that would be way non-portable. -djg
166 */
167
168/* We have a apr_table_t that we can index by character and it tells us if the
169 * character is one of the interesting delimiters. Note that we even get
170 * compares for NUL for free -- it's just another delimiter.
171 */
172
173#define T_SLASH 0x01 /* '/' */
174#define T_QUESTION 0x02 /* '?' */
175#define T_HASH 0x04 /* '#' */
176#define T_ALPHA 0x08 /* 'A' ... 'Z', 'a' ... 'z' */
177#define T_SCHEME 0x10 /* '0' ... '9', '-', '+', '.'
178 * (allowed in scheme except first char)
179 */
180#define T_NUL 0x80 /* '\0' */
181
182#if APR_CHARSET_EBCDIC
183/* Delimiter table for the EBCDIC character set */
184static const unsigned char uri_delims[256] = {
185 T_NUL, /* 0x00 */
186 0, /* 0x01 */
187 0, /* 0x02 */
188 0, /* 0x03 */
189 0, /* 0x04 */
190 0, /* 0x05 */
191 0, /* 0x06 */
192 0, /* 0x07 */
193 0, /* 0x08 */
194 0, /* 0x09 */
195 0, /* 0x0a */
196 0, /* 0x0b */
197 0, /* 0x0c */
198 0, /* 0x0d */
199 0, /* 0x0e */
200 0, /* 0x0f */
201 0, /* 0x10 */
202 0, /* 0x11 */
203 0, /* 0x12 */
204 0, /* 0x13 */
205 0, /* 0x14 */
206 0, /* 0x15 */
207 0, /* 0x16 */
208 0, /* 0x17 */
209 0, /* 0x18 */
210 0, /* 0x19 */
211 0, /* 0x1a */
212 0, /* 0x1b */
213 0, /* 0x1c */
214 0, /* 0x1d */
215 0, /* 0x1e */
216 0, /* 0x1f */
217 0, /* 0x20 */
218 0, /* 0x21 */
219 0, /* 0x22 */
220 0, /* 0x23 */
221 0, /* 0x24 */
222 0, /* 0x25 */
223 0, /* 0x26 */
224 0, /* 0x27 */
225 0, /* 0x28 */
226 0, /* 0x29 */
227 0, /* 0x2a */
228 0, /* 0x2b */
229 0, /* 0x2c */
230 0, /* 0x2d */
231 0, /* 0x2e */
232 0, /* 0x2f */
233 0, /* 0x30 */
234 0, /* 0x31 */
235 0, /* 0x32 */
236 0, /* 0x33 */
237 0, /* 0x34 */
238 0, /* 0x35 */
239 0, /* 0x36 */
240 0, /* 0x37 */
241 0, /* 0x38 */
242 0, /* 0x39 */
243 0, /* 0x3a */
244 0, /* 0x3b */
245 0, /* 0x3c */
246 0, /* 0x3d */
247 0, /* 0x3e */
248 0, /* 0x3f */
249 0, /* 0x40 ' ' */
250 0, /* 0x41 */
251 0, /* 0x42 */
252 0, /* 0x43 */
253 0, /* 0x44 */
254 0, /* 0x45 */
255 0, /* 0x46 */
256 0, /* 0x47 */
257 0, /* 0x48 */
258 0, /* 0x49 */
259 0, /* 0x4a '[' */
260 T_SCHEME, /* 0x4b '.' */
261 0, /* 0x4c '<' */
262 0, /* 0x4d '(' */
263 T_SCHEME, /* 0x4e '+' */
264 0, /* 0x4f '!' */
265 0, /* 0x50 '&' */
266 0, /* 0x51 */
267 0, /* 0x52 */
268 0, /* 0x53 */
269 0, /* 0x54 */
270 0, /* 0x55 */
271 0, /* 0x56 */
272 0, /* 0x57 */
273 0, /* 0x58 */
274 0, /* 0x59 */
275 0, /* 0x5a ']' */
276 0, /* 0x5b '$' */
277 0, /* 0x5c '*' */
278 0, /* 0x5d ')' */
279 0, /* 0x5e ';' */
280 0, /* 0x5f '^' */
281 T_SCHEME, /* 0x60 '-' */
282 T_SLASH, /* 0x61 '/' */
283 0, /* 0x62 */
284 0, /* 0x63 */
285 0, /* 0x64 */
286 0, /* 0x65 */
287 0, /* 0x66 */
288 0, /* 0x67 */
289 0, /* 0x68 */
290 0, /* 0x69 */
291 0, /* 0x6a '|' */
292 0, /* 0x6b ',' */
293 0, /* 0x6c '%' */
294 0, /* 0x6d '_' */
295 0, /* 0x6e '>' */
296 T_QUESTION, /* 0x6f '?' */
297 0, /* 0x70 */
298 0, /* 0x71 */
299 0, /* 0x72 */
300 0, /* 0x73 */
301 0, /* 0x74 */
302 0, /* 0x75 */
303 0, /* 0x76 */
304 0, /* 0x77 */
305 0, /* 0x78 */
306 0, /* 0x79 '`' */
307 0, /* 0x7a ':' */
308 T_HASH, /* 0x7b '#' */
309 0, /* 0x7c '@' */
310 0, /* 0x7d ''' */
311 0, /* 0x7e '=' */
312 0, /* 0x7f '"' */
313 0, /* 0x80 */
314 T_ALPHA, /* 0x81 'a' */
315 T_ALPHA, /* 0x82 'b' */
316 T_ALPHA, /* 0x83 'c' */
317 T_ALPHA, /* 0x84 'd' */
318 T_ALPHA, /* 0x85 'e' */
319 T_ALPHA, /* 0x86 'f' */
320 T_ALPHA, /* 0x87 'g' */
321 T_ALPHA, /* 0x88 'h' */
322 T_ALPHA, /* 0x89 'i' */
323 0, /* 0x8a */
324 0, /* 0x8b */
325 0, /* 0x8c */
326 0, /* 0x8d */
327 0, /* 0x8e */
328 0, /* 0x8f */
329 0, /* 0x90 */
330 T_ALPHA, /* 0x91 'j' */
331 T_ALPHA, /* 0x92 'k' */
332 T_ALPHA, /* 0x93 'l' */
333 T_ALPHA, /* 0x94 'm' */
334 T_ALPHA, /* 0x95 'n' */
335 T_ALPHA, /* 0x96 'o' */
336 T_ALPHA, /* 0x97 'p' */
337 T_ALPHA, /* 0x98 'q' */
338 T_ALPHA, /* 0x99 'r' */
339 0, /* 0x9a */
340 0, /* 0x9b */
341 0, /* 0x9c */
342 0, /* 0x9d */
343 0, /* 0x9e */
344 0, /* 0x9f */
345 0, /* 0xa0 */
346 0, /* 0xa1 '~' */
347 T_ALPHA, /* 0xa2 's' */
348 T_ALPHA, /* 0xa3 't' */
349 T_ALPHA, /* 0xa4 'u' */
350 T_ALPHA, /* 0xa5 'v' */
351 T_ALPHA, /* 0xa6 'w' */
352 T_ALPHA, /* 0xa7 'x' */
353 T_ALPHA, /* 0xa8 'y' */
354 T_ALPHA, /* 0xa9 'z' */
355 0, /* 0xaa */
356 0, /* 0xab */
357 0, /* 0xac */
358 0, /* 0xad */
359 0, /* 0xae */
360 0, /* 0xaf */
361 0, /* 0xb0 */
362 0, /* 0xb1 */
363 0, /* 0xb2 */
364 0, /* 0xb3 */
365 0, /* 0xb4 */
366 0, /* 0xb5 */
367 0, /* 0xb6 */
368 0, /* 0xb7 */
369 0, /* 0xb8 */
370 0, /* 0xb9 */
371 0, /* 0xba */
372 0, /* 0xbb */
373 0, /* 0xbc */
374 0, /* 0xbd */
375 0, /* 0xbe */
376 0, /* 0xbf */
377 0, /* 0xc0 '{' */
378 T_ALPHA, /* 0xc1 'A' */
379 T_ALPHA, /* 0xc2 'B' */
380 T_ALPHA, /* 0xc3 'C' */
381 T_ALPHA, /* 0xc4 'D' */
382 T_ALPHA, /* 0xc5 'E' */
383 T_ALPHA, /* 0xc6 'F' */
384 T_ALPHA, /* 0xc7 'G' */
385 T_ALPHA, /* 0xc8 'H' */
386 T_ALPHA, /* 0xc9 'I' */
387 0, /* 0xca */
388 0, /* 0xcb */
389 0, /* 0xcc */
390 0, /* 0xcd */
391 0, /* 0xce */
392 0, /* 0xcf */
393 0, /* 0xd0 '}' */
394 T_ALPHA, /* 0xd1 'J' */
395 T_ALPHA, /* 0xd2 'K' */
396 T_ALPHA, /* 0xd3 'L' */
397 T_ALPHA, /* 0xd4 'M' */
398 T_ALPHA, /* 0xd5 'N' */
399 T_ALPHA, /* 0xd6 'O' */
400 T_ALPHA, /* 0xd7 'P' */
401 T_ALPHA, /* 0xd8 'Q' */
402 T_ALPHA, /* 0xd9 'R' */
403 0, /* 0xda */
404 0, /* 0xdb */
405 0, /* 0xdc */
406 0, /* 0xdd */
407 0, /* 0xde */
408 0, /* 0xdf */
409 0, /* 0xe0 '\' */
410 0, /* 0xe1 */
411 T_ALPHA, /* 0xe2 'S' */
412 T_ALPHA, /* 0xe3 'T' */
413 T_ALPHA, /* 0xe4 'U' */
414 T_ALPHA, /* 0xe5 'V' */
415 T_ALPHA, /* 0xe6 'W' */
416 T_ALPHA, /* 0xe7 'X' */
417 T_ALPHA, /* 0xe8 'Y' */
418 T_ALPHA, /* 0xe9 'Z' */
419 0, /* 0xea */
420 0, /* 0xeb */
421 0, /* 0xec */
422 0, /* 0xed */
423 0, /* 0xee */
424 0, /* 0xef */
425 T_SCHEME, /* 0xf0 '0' */
426 T_SCHEME, /* 0xf1 '1' */
427 T_SCHEME, /* 0xf2 '2' */
428 T_SCHEME, /* 0xf3 '3' */
429 T_SCHEME, /* 0xf4 '4' */
430 T_SCHEME, /* 0xf5 '5' */
431 T_SCHEME, /* 0xf6 '6' */
432 T_SCHEME, /* 0xf7 '7' */
433 T_SCHEME, /* 0xf8 '8' */
434 T_SCHEME, /* 0xf9 '9' */
435 0, /* 0xfa */
436 0, /* 0xfb */
437 0, /* 0xfc */
438 0, /* 0xfd */
439 0, /* 0xfe */
440 0 /* 0xff */
441};
442#else
443/* Delimiter table for the ASCII character set */
444static const unsigned char uri_delims[256] = {
445 T_NUL, /* 0x00 */
446 0, /* 0x01 */
447 0, /* 0x02 */
448 0, /* 0x03 */
449 0, /* 0x04 */
450 0, /* 0x05 */
451 0, /* 0x06 */
452 0, /* 0x07 */
453 0, /* 0x08 */
454 0, /* 0x09 */
455 0, /* 0x0a */
456 0, /* 0x0b */
457 0, /* 0x0c */
458 0, /* 0x0d */
459 0, /* 0x0e */
460 0, /* 0x0f */
461 0, /* 0x10 */
462 0, /* 0x11 */
463 0, /* 0x12 */
464 0, /* 0x13 */
465 0, /* 0x14 */
466 0, /* 0x15 */
467 0, /* 0x16 */
468 0, /* 0x17 */
469 0, /* 0x18 */
470 0, /* 0x19 */
471 0, /* 0x1a */
472 0, /* 0x1b */
473 0, /* 0x1c */
474 0, /* 0x1d */
475 0, /* 0x1e */
476 0, /* 0x1f */
477 0, /* 0x20 ' ' */
478 0, /* 0x21 '!' */
479 0, /* 0x22 '"' */
480 T_HASH, /* 0x23 '#' */
481 0, /* 0x24 '$' */
482 0, /* 0x25 '%' */
483 0, /* 0x26 '&' */
484 0, /* 0x27 ''' */
485 0, /* 0x28 '(' */
486 0, /* 0x29 ')' */
487 0, /* 0x2a '*' */
488 T_SCHEME, /* 0x2b '+' */
489 0, /* 0x2c ',' */
490 T_SCHEME, /* 0x2d '-' */
491 T_SCHEME, /* 0x2e '.' */
492 T_SLASH, /* 0x2f '/' */
493 T_SCHEME, /* 0x30 '0' */
494 T_SCHEME, /* 0x31 '1' */
495 T_SCHEME, /* 0x32 '2' */
496 T_SCHEME, /* 0x33 '3' */
497 T_SCHEME, /* 0x34 '4' */
498 T_SCHEME, /* 0x35 '5' */
499 T_SCHEME, /* 0x36 '6' */
500 T_SCHEME, /* 0x37 '7' */
501 T_SCHEME, /* 0x38 '8' */
502 T_SCHEME, /* 0x39 '9' */
503 0, /* 0x3a ':' */
504 0, /* 0x3b ';' */
505 0, /* 0x3c '<' */
506 0, /* 0x3d '=' */
507 0, /* 0x3e '>' */
508 T_QUESTION, /* 0x3f '?' */
509 0, /* 0x40 '@' */
510 T_ALPHA, /* 0x41 'A' */
511 T_ALPHA, /* 0x42 'B' */
512 T_ALPHA, /* 0x43 'C' */
513 T_ALPHA, /* 0x44 'D' */
514 T_ALPHA, /* 0x45 'E' */
515 T_ALPHA, /* 0x46 'F' */
516 T_ALPHA, /* 0x47 'G' */
517 T_ALPHA, /* 0x48 'H' */
518 T_ALPHA, /* 0x49 'I' */
519 T_ALPHA, /* 0x4a 'J' */
520 T_ALPHA, /* 0x4b 'K' */
521 T_ALPHA, /* 0x4c 'L' */
522 T_ALPHA, /* 0x4d 'M' */
523 T_ALPHA, /* 0x4e 'N' */
524 T_ALPHA, /* 0x4f 'O' */
525 T_ALPHA, /* 0x50 'P' */
526 T_ALPHA, /* 0x51 'Q' */
527 T_ALPHA, /* 0x52 'R' */
528 T_ALPHA, /* 0x53 'S' */
529 T_ALPHA, /* 0x54 'T' */
530 T_ALPHA, /* 0x55 'U' */
531 T_ALPHA, /* 0x56 'V' */
532 T_ALPHA, /* 0x57 'W' */
533 T_ALPHA, /* 0x58 'X' */
534 T_ALPHA, /* 0x59 'Y' */
535 T_ALPHA, /* 0x5a 'Z' */
536 0, /* 0x5b '[' */
537 0, /* 0x5c '\' */
538 0, /* 0x5d ']' */
539 0, /* 0x5e '^' */
540 0, /* 0x5f '_' */
541 0, /* 0x60 '`' */
542 T_ALPHA, /* 0x61 'a' */
543 T_ALPHA, /* 0x62 'b' */
544 T_ALPHA, /* 0x63 'c' */
545 T_ALPHA, /* 0x64 'd' */
546 T_ALPHA, /* 0x65 'e' */
547 T_ALPHA, /* 0x66 'f' */
548 T_ALPHA, /* 0x67 'g' */
549 T_ALPHA, /* 0x68 'h' */
550 T_ALPHA, /* 0x69 'i' */
551 T_ALPHA, /* 0x6a 'j' */
552 T_ALPHA, /* 0x6b 'k' */
553 T_ALPHA, /* 0x6c 'l' */
554 T_ALPHA, /* 0x6d 'm' */
555 T_ALPHA, /* 0x6e 'n' */
556 T_ALPHA, /* 0x6f 'o' */
557 T_ALPHA, /* 0x70 'p' */
558 T_ALPHA, /* 0x71 'q' */
559 T_ALPHA, /* 0x72 'r' */
560 T_ALPHA, /* 0x73 's' */
561 T_ALPHA, /* 0x74 't' */
562 T_ALPHA, /* 0x75 'u' */
563 T_ALPHA, /* 0x76 'v' */
564 T_ALPHA, /* 0x77 'w' */
565 T_ALPHA, /* 0x78 'x' */
566 T_ALPHA, /* 0x79 'y' */
567 T_ALPHA, /* 0x7a 'z' */
568 0, /* 0x7b '{' */
569 0, /* 0x7c '|' */
570 0, /* 0x7d '}' */
571 0, /* 0x7e '~' */
572 0, /* 0x7f */
573 0, /* 0x80 */
574 0, /* 0x81 */
575 0, /* 0x82 */
576 0, /* 0x83 */
577 0, /* 0x84 */
578 0, /* 0x85 */
579 0, /* 0x86 */
580 0, /* 0x87 */
581 0, /* 0x88 */
582 0, /* 0x89 */
583 0, /* 0x8a */
584 0, /* 0x8b */
585 0, /* 0x8c */
586 0, /* 0x8d */
587 0, /* 0x8e */
588 0, /* 0x8f */
589 0, /* 0x90 */
590 0, /* 0x91 */
591 0, /* 0x92 */
592 0, /* 0x93 */
593 0, /* 0x94 */
594 0, /* 0x95 */
595 0, /* 0x96 */
596 0, /* 0x97 */
597 0, /* 0x98 */
598 0, /* 0x99 */
599 0, /* 0x9a */
600 0, /* 0x9b */
601 0, /* 0x9c */
602 0, /* 0x9d */
603 0, /* 0x9e */
604 0, /* 0x9f */
605 0, /* 0xa0 */
606 0, /* 0xa1 */
607 0, /* 0xa2 */
608 0, /* 0xa3 */
609 0, /* 0xa4 */
610 0, /* 0xa5 */
611 0, /* 0xa6 */
612 0, /* 0xa7 */
613 0, /* 0xa8 */
614 0, /* 0xa9 */
615 0, /* 0xaa */
616 0, /* 0xab */
617 0, /* 0xac */
618 0, /* 0xad */
619 0, /* 0xae */
620 0, /* 0xaf */
621 0, /* 0xb0 */
622 0, /* 0xb1 */
623 0, /* 0xb2 */
624 0, /* 0xb3 */
625 0, /* 0xb4 */
626 0, /* 0xb5 */
627 0, /* 0xb6 */
628 0, /* 0xb7 */
629 0, /* 0xb8 */
630 0, /* 0xb9 */
631 0, /* 0xba */
632 0, /* 0xbb */
633 0, /* 0xbc */
634 0, /* 0xbd */
635 0, /* 0xbe */
636 0, /* 0xbf */
637 0, /* 0xc0 */
638 0, /* 0xc1 */
639 0, /* 0xc2 */
640 0, /* 0xc3 */
641 0, /* 0xc4 */
642 0, /* 0xc5 */
643 0, /* 0xc6 */
644 0, /* 0xc7 */
645 0, /* 0xc8 */
646 0, /* 0xc9 */
647 0, /* 0xca */
648 0, /* 0xcb */
649 0, /* 0xcc */
650 0, /* 0xcd */
651 0, /* 0xce */
652 0, /* 0xcf */
653 0, /* 0xd0 */
654 0, /* 0xd1 */
655 0, /* 0xd2 */
656 0, /* 0xd3 */
657 0, /* 0xd4 */
658 0, /* 0xd5 */
659 0, /* 0xd6 */
660 0, /* 0xd7 */
661 0, /* 0xd8 */
662 0, /* 0xd9 */
663 0, /* 0xda */
664 0, /* 0xdb */
665 0, /* 0xdc */
666 0, /* 0xdd */
667 0, /* 0xde */
668 0, /* 0xdf */
669 0, /* 0xe0 */
670 0, /* 0xe1 */
671 0, /* 0xe2 */
672 0, /* 0xe3 */
673 0, /* 0xe4 */
674 0, /* 0xe5 */
675 0, /* 0xe6 */
676 0, /* 0xe7 */
677 0, /* 0xe8 */
678 0, /* 0xe9 */
679 0, /* 0xea */
680 0, /* 0xeb */
681 0, /* 0xec */
682 0, /* 0xed */
683 0, /* 0xee */
684 0, /* 0xef */
685 0, /* 0xf0 */
686 0, /* 0xf1 */
687 0, /* 0xf2 */
688 0, /* 0xf3 */
689 0, /* 0xf4 */
690 0, /* 0xf5 */
691 0, /* 0xf6 */
692 0, /* 0xf7 */
693 0, /* 0xf8 */
694 0, /* 0xf9 */
695 0, /* 0xfa */
696 0, /* 0xfb */
697 0, /* 0xfc */
698 0, /* 0xfd */
699 0, /* 0xfe */
700 0 /* 0xff */
701};
702#endif
703
704
705/* it works like this:
706 if (uri_delims[ch] & NOTEND_foobar) {
707 then we're not at a delimiter for foobar
708 }
711#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
712#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL)
713
714/* parse_uri_components():
715 * Parse a given URI, fill in all supplied fields of a uri_components
716 * structure. This eliminates the necessity of extracting host, port,
717 * path, query info repeatedly in the modules.
718 * Side effects:
719 * - fills in fields of uri_components *uptr
720 * - none on any of the r->* fields
721 */
724{
725 const char *s;
726 const char *s1;
727 const char *hostinfo;
728 char *endstr;
729 int port;
730 int v6_offset1 = 0, v6_offset2 = 0;
731
732 /* Initialize the structure. parse_uri() and parse_uri_components()
733 * can be called more than once per request.
734 */
735 memset (uptr, '\0', sizeof(*uptr));
736 uptr->is_initialized = 1;
737
738 /* We assume the processor has a branch predictor like most --
739 * it assumes forward branches are untaken and backwards are taken. That's
740 * the reason for the gotos. -djg
741 */
742 if (uri[0] == '/') {
743 /* RFC2396 #4.3 says that two leading slashes mean we have an
744 * authority component, not a path! Fixing this looks scary
745 * with the gotos here. But if the existing logic is valid,
746 * then presumably a goto pointing to deal_with_authority works.
747 *
748 * RFC2396 describes this as resolving an ambiguity. In the
749 * case of three or more slashes there would seem to be no
750 * ambiguity, so it is a path after all.
751 */
752 if (uri[1] == '/' && uri[2] != '/') {
753 s = uri + 2 ;
755 }
756
758 /* we expect uri to point to first character of path ... remember
759 * that the path could be empty -- http://foobar?query for example
760 */
761 s = uri;
762 while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
763 ++s;
764 }
765 if (s != uri) {
766 uptr->path = apr_pstrmemdup(p, uri, s - uri);
767 }
768 if (*s == 0) {
769 return APR_SUCCESS;
770 }
771 if (*s == '?') {
772 ++s;
773 s1 = strchr(s, '#');
774 if (s1) {
775 uptr->fragment = apr_pstrdup(p, s1 + 1);
776 uptr->query = apr_pstrmemdup(p, s, s1 - s);
777 }
778 else {
779 uptr->query = apr_pstrdup(p, s);
780 }
781 return APR_SUCCESS;
782 }
783 /* otherwise it's a fragment */
784 uptr->fragment = apr_pstrdup(p, s + 1);
785 return APR_SUCCESS;
786 }
787
788 /* find the scheme: */
789 s = uri;
790 /* first char must be letter */
791 if (uri_delims[*(unsigned char *)s] & T_ALPHA) {
792 ++s;
793 while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME)))
794 ++s;
795 }
796 /* scheme must be non-empty and followed by : */
797 if (s != uri && s[0] == ':') {
799 s++;
800 }
801 else {
802 /* No valid scheme, restart from the beginning */
803 s = uri;
804 }
805
806 if (s[0] != '/' || s[1] != '/') {
807 if (uri == s) {
808 /*
809 * RFC 3986 3.3: If we have no scheme and no authority,
810 * the leading segment of a relative path must not contain a ':'.
811 */
812 char *first_slash = strchr(uri, '/');
813 if (first_slash) {
814 while (s < first_slash) {
815 if (s[0] == ':')
816 return APR_EGENERAL;
817 ++s;
818 }
819 /* no scheme but relative path, e.g. '../image.jpg' */
820 }
821 else {
822 if (strchr(uri, ':') != NULL)
823 return APR_EGENERAL;
824 /* no scheme, no slash, but relative path, e.g. 'image.jpg' */
825 }
826 goto deal_with_path;
827 }
828 /* scheme and relative path */
829 uri = s;
830 goto deal_with_path;
831 }
832
833 s += 2;
834
836 hostinfo = s;
837 while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
838 ++s;
839 }
840 uri = s; /* whatever follows hostinfo is start of uri */
842
843 /* If there's a username:password@host:port, the @ we want is the last @...
844 * too bad there's no memrchr()... For the C purists, note that hostinfo
845 * is definitely not the first character of the original uri so therefore
846 * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
847 */
848 do {
849 --s;
850 } while (s >= hostinfo && *s != '@');
851 if (s < hostinfo) {
852 /* again we want the common case to be fall through */
854 /* We expect hostinfo to point to the first character of
855 * the hostname. If there's a port it is the first colon,
856 * except with IPv6.
857 */
858 if (*hostinfo == '[') {
859 v6_offset1 = 1;
860 v6_offset2 = 2;
861 s = memchr(hostinfo, ']', uri - hostinfo);
862 if (s == NULL) {
863 return APR_EGENERAL;
864 }
865 if (*++s != ':') {
866 s = NULL; /* no port */
867 }
868 }
869 else {
870 s = memchr(hostinfo, ':', uri - hostinfo);
871 }
872 if (s == NULL) {
873 /* we expect the common case to have no port */
877 goto deal_with_path;
878 }
881 s - hostinfo - v6_offset2);
882 ++s;
884 if (uri != s) {
885 port = strtol(uptr->port_str, &endstr, 10);
886 uptr->port = port;
887 if (*endstr == '\0') {
888 goto deal_with_path;
889 }
890 /* Invalid characters after ':' found */
891 return APR_EGENERAL;
892 }
894 goto deal_with_path;
895 }
896
897 /* first colon delimits username:password */
898 s1 = memchr(hostinfo, ':', s - hostinfo);
899 if (s1) {
901 ++s1;
903 }
904 else {
906 }
907 hostinfo = s + 1;
908 goto deal_with_host;
909}
910
911/* Special case for CONNECT parsing: it comes with the hostinfo part only */
912/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
913 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
914 * for the format of the "CONNECT host:port HTTP/1.0" request
915 */
917 const char *hostinfo,
919{
920 const char *s;
921 char *endstr;
922 const char *rsb;
923 int v6_offset1 = 0;
924
925 /* Initialize the structure. parse_uri() and parse_uri_components()
926 * can be called more than once per request.
927 */
928 memset(uptr, '\0', sizeof(*uptr));
929 uptr->is_initialized = 1;
931
932 /* We expect hostinfo to point to the first character of
933 * the hostname. There must be a port, separated by a colon
934 */
935 if (*hostinfo == '[') {
936 if ((rsb = strchr(hostinfo, ']')) == NULL ||
937 *(rsb + 1) != ':') {
938 return APR_EGENERAL;
939 }
940 /* literal IPv6 address */
941 s = rsb + 1;
942 ++hostinfo;
943 v6_offset1 = 1;
944 }
945 else {
946 s = strchr(hostinfo, ':');
947 }
948 if (s == NULL) {
949 return APR_EGENERAL;
950 }
952 ++s;
954 if (*s != '\0') {
955 uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
956 if (*endstr == '\0') {
957 return APR_SUCCESS;
958 }
959 /* Invalid characters after ':' found */
960 }
961 return APR_EGENERAL;
962}
APR Miscellaneous library routines.
APU_DECLARE(void)
Computes SipHash-2-4, producing a 64bit (APR_SIPHASH_DSIZE) hash from a message and a 128bit (APR_SIP...
Definition apr_sha1.c:206
APR Strings library.
#define NOTEND_HOSTINFO
Definition apr_uri.c:709
static const unsigned char uri_delims[256]
Definition apr_uri.c:442
#define T_NUL
Definition apr_uri.c:178
static schemes_t schemes[]
Definition apr_uri.c:48
#define T_HASH
Definition apr_uri.c:175
#define T_ALPHA
Definition apr_uri.c:176
#define T_SCHEME
Definition apr_uri.c:177
#define T_SLASH
Definition apr_uri.c:173
#define NOTEND_PATH
Definition apr_uri.c:710
#define T_QUESTION
Definition apr_uri.c:174
APR-UTIL URI Routines.
APR Standard Headers Support.
const char apr_port_t port
Definition http_vhost.h:125
#define APR_EGENERAL
Definition apr_errno.h:313
const char apr_ssize_t int flags
Definition apr_encode.h:168
#define APR_URI_FTP_DEFAULT_PORT
Definition apr_uri.h:43
#define APR_URI_TIP_DEFAULT_PORT
Definition apr_uri.h:59
#define APR_URI_WAIS_DEFAULT_PORT
Definition apr_uri.h:52
#define APR_URI_TELNET_DEFAULT_PORT
Definition apr_uri.h:45
const char * hostinfo
Definition apr_uri.h:170
const apr_uri_t * uptr
Definition apr_uri.h:147
#define APR_URI_PROSPERO_DEFAULT_PORT
Definition apr_uri.h:51
#define APR_URI_UNP_OMITQUERY
Definition apr_uri.h:77
#define APR_URI_SSH_DEFAULT_PORT
Definition apr_uri.h:44
#define APR_URI_UNP_REVEALPASSWORD
Definition apr_uri.h:73
#define APR_URI_LDAP_DEFAULT_PORT
Definition apr_uri.h:53
#define APR_URI_SNEWS_DEFAULT_PORT
Definition apr_uri.h:56
const char * uri
Definition apr_uri.h:159
#define APR_URI_IMAP_DEFAULT_PORT
Definition apr_uri.h:50
#define APR_URI_HTTP_DEFAULT_PORT
Definition apr_uri.h:47
#define APR_URI_NNTP_DEFAULT_PORT
Definition apr_uri.h:49
#define APR_URI_UNP_OMITUSER
Definition apr_uri.h:66
#define APR_URI_NFS_DEFAULT_PORT
Definition apr_uri.h:58
#define APR_URI_UNP_OMITPATHINFO
Definition apr_uri.h:75
#define APR_URI_HTTPS_DEFAULT_PORT
Definition apr_uri.h:54
#define APR_URI_SIP_DEFAULT_PORT
Definition apr_uri.h:60
#define APR_URI_GOPHER_DEFAULT_PORT
Definition apr_uri.h:46
#define APR_URI_RTSP_DEFAULT_PORT
Definition apr_uri.h:55
#define APR_URI_UNP_OMITPASSWORD
Definition apr_uri.h:68
#define APR_URI_UNP_OMITSITEPART
Definition apr_uri.h:64
#define APR_URI_ACAP_DEFAULT_PORT
Definition apr_uri.h:57
#define APR_URI_POP_DEFAULT_PORT
Definition apr_uri.h:48
apr_size_t size
#define APR_SUCCESS
Definition apr_errno.h:225
int apr_status_t
Definition apr_errno.h:44
void * memchr(const void *s, int c, size_t n)
int strcasecmp(const char *a, const char *b)
apr_uint16_t apr_port_t
const char * s
Definition apr_strings.h:95
apr_pool_t * p
Definition md_event.c:32
return NULL
Definition mod_so.c:359
char * fragment
Definition apr_uri.h:103
char * user
Definition apr_uri.h:91
char * scheme
Definition apr_uri.h:87
char * path
Definition apr_uri.h:99
apr_port_t port
Definition apr_uri.h:109
unsigned is_initialized
Definition apr_uri.h:112
char * password
Definition apr_uri.h:93
char * query
Definition apr_uri.h:101
char * hostname
Definition apr_uri.h:95
char * hostinfo
Definition apr_uri.h:89
char * port_str
Definition apr_uri.h:97
apr_port_t default_port
Definition apr_uri.c:41
const char * name
Definition apr_uri.c:39