Apache HTTPD
mod_charset_lite.c
Go to the documentation of this file.
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * simple hokey charset recoding configuration module
19 *
20 * See mod_ebcdic and mod_charset for more thought-out examples. This
21 * one is just so Jeff can learn how a module works and experiment with
22 * basic character set recoding configuration.
23 *
24 * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
25 */
26
27#include "httpd.h"
28#include "http_config.h"
29
30#include "http_core.h"
31#include "http_log.h"
32#include "http_main.h"
33#include "http_protocol.h"
34#include "http_request.h"
35#include "util_charset.h"
36#include "apr_buckets.h"
37#include "util_filter.h"
38#include "apr_strings.h"
39#include "apr_lib.h"
40#include "apr_xlate.h"
41#define APR_WANT_STRFUNC
42#include "apr_want.h"
43
44#define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
45#define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */
46
47#define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much
48 * space left in the translation buffer
49 */
50
51#define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle
52 * two buckets
53 */
55/* extended error status codes; this is used in addition to an apr_status_t to
56 * track errors in the translation filter
57 */
58typedef enum {
59 EES_INIT = 0, /* no error info yet; value must be 0 for easy init */
60 EES_LIMIT, /* built-in restriction encountered */
61 EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
63 EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
64 EES_BAD_INPUT /* input data invalid */
65} ees_t;
67/* registered name of the output translation filter */
68#define XLATEOUT_FILTER_NAME "XLATEOUT"
69/* registered name of input translation filter */
70#define XLATEIN_FILTER_NAME "XLATEIN"
71
72typedef struct charset_dir_t {
73 const char *charset_source; /* source encoding */
74 const char *charset_default; /* how to ship on wire */
80
81/* charset_filter_ctx_t is created for each filter instance; because the same
82 * filter code is used for translating in both directions, we need this context
83 * data to tell the filter which translation handle to use; it also can hold a
84 * character which was split between buckets
85 */
86typedef struct charset_filter_ctx_t {
88 int is_sb; /* single-byte translation? */
90 ees_t ees; /* extended error status */
92 char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
93 int ran; /* has filter instance run before? */
94 int noop; /* should we pass brigades through unchanged? */
95 char *tmp; /* buffer for input filtering */
96 apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
97 apr_bucket_brigade *tmpbb; /* used for passing downstream */
100/* charset_req_t is available via r->request_config if any translation is
101 * being performed
102 */
103typedef struct charset_req_t {
107
108module AP_MODULE_DECLARE_DATA charset_lite_module;
109
110static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
111{
114 return dc;
115}
116
117static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
118{
122
123 /* If it is defined in the current container, use it. Otherwise, use the one
124 * from the enclosing container.
125 */
126
127 a->charset_default =
128 over->charset_default ? over->charset_default : base->charset_default;
129 a->charset_source =
130 over->charset_source ? over->charset_source : base->charset_source;
131 a->implicit_add =
132 over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
133 a->force_xlate=
134 over->force_xlate != FX_INIT ? over->force_xlate : base->force_xlate;
135 return a;
137
138/* CharsetSourceEnc charset
139 */
140static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
141 const char *name)
142{
143 charset_dir_t *dc = in_dc;
144
145 dc->charset_source = name;
146 return NULL;
148
149/* CharsetDefault charset
150 */
151static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
152 const char *name)
153{
154 charset_dir_t *dc = in_dc;
155
156 dc->charset_default = name;
157 return NULL;
159
160/* CharsetOptions optionflag...
161 */
162static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
163 const char *flag)
164{
165 charset_dir_t *dc = in_dc;
166
167 if (!strcasecmp(flag, "ImplicitAdd")) {
168 dc->implicit_add = IA_IMPADD;
169 }
170 else if (!strcasecmp(flag, "NoImplicitAdd")) {
171 dc->implicit_add = IA_NOIMPADD;
172 }
173 else if (!strcasecmp(flag, "TranslateAllMimeTypes")) {
174 dc->force_xlate = FX_FORCE;
175 }
176 else if (!strcasecmp(flag, "NoTranslateAllMimeTypes")) {
177 dc->force_xlate = FX_NOFORCE;
178 }
179 else {
180 return apr_pstrcat(cmd->temp_pool,
181 "Invalid CharsetOptions option: ",
182 flag,
183 NULL);
184 }
185
186 return NULL;
187}
188
189/* find_code_page() is a fixup hook that checks if the module is
190 * configured and the input or output potentially need to be translated.
191 * If so, context is initialized for the filters.
192 */
193static int find_code_page(request_rec *r)
194{
196 &charset_lite_module);
198 charset_filter_ctx_t *input_ctx, *output_ctx;
199 apr_status_t rv;
200
202 "uri: %s file: %s method: %d "
203 "imt: %s flags: %s%s%s %s->%s",
204 r->uri,
205 r->filename ? r->filename : "(none)",
207 r->content_type ? r->content_type : "(unknown)",
208 r->main ? "S" : "", /* S if subrequest */
209 r->prev ? "R" : "", /* R if redirect */
210 r->proxyreq ? "P" : "", /* P if proxy */
212
213 /* If we don't have a full directory configuration, bail out.
214 */
215 if (!dc->charset_source || !dc->charset_default) {
217 "incomplete configuration: src %s, dst %s",
218 dc->charset_source ? dc->charset_source : "unspecified",
219 dc->charset_default ? dc->charset_default : "unspecified");
220 return DECLINED;
221 }
222
223 /* catch proxy requests */
224 if (r->proxyreq) {
225 return DECLINED;
226 }
227
228 /* mod_rewrite indicators */
229 if (r->filename
230 && (!strncmp(r->filename, "redirect:", 9)
231 || !strncmp(r->filename, "gone:", 5)
232 || !strncmp(r->filename, "passthrough:", 12)
233 || !strncmp(r->filename, "forbidden:", 10))) {
234 return DECLINED;
235 }
236
237 /* no translation when server and network charsets are set to the same value */
239 return DECLINED;
240 }
241
242 /* Get storage for the request data and the output filter context.
243 * We rarely need the input filter context, so allocate that separately.
244 */
246 sizeof(charset_req_t) +
247 sizeof(charset_filter_ctx_t));
248 output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);
249
250 reqinfo->dc = dc;
251 output_ctx->dc = dc;
252 output_ctx->tmpbb = apr_brigade_create(r->pool,
254 ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);
255
256 reqinfo->output_ctx = output_ctx;
257
258 switch (r->method_number) {
259 case M_PUT:
260 case M_POST:
261 /* Set up input translation. Note: A request body can be included
262 * with the OPTIONS method, but for now we don't set up translation
263 * of it.
264 */
265 input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
266 input_ctx->bb = apr_brigade_create(r->pool,
268 input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
269 input_ctx->dc = dc;
270 reqinfo->input_ctx = input_ctx;
271 rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
272 dc->charset_default, r->pool);
273 if (rv != APR_SUCCESS) {
275 "can't open translation %s->%s",
278 }
279 if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) {
280 input_ctx->is_sb = 0;
281 }
282 }
284 return DECLINED;
285}
286
287static int configured_in_list(request_rec *r, const char *filter_name,
288 struct ap_filter_t *filter_list)
289{
290 struct ap_filter_t *filter = filter_list;
291
292 while (filter) {
293 if (!strcasecmp(filter_name, filter->frec->name)) {
294 return 1;
295 }
296 filter = filter->next;
298 return 0;
299}
300
301static int configured_on_input(request_rec *r, const char *filter_name)
303 return configured_in_list(r, filter_name, r->input_filters);
304}
305
306static int configured_on_output(request_rec *r, const char *filter_name)
307{
308 return configured_in_list(r, filter_name, r->output_filters);
309}
311/* xlate_insert_filter() is a filter hook which decides whether or not
312 * to insert a translation filter for the current request.
313 */
315{
316 /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
318 &charset_lite_module);
320 &charset_lite_module);
321
322 if (dc && (dc->implicit_add == IA_NOIMPADD)) {
324 "xlate output filter not added implicitly because "
325 "CharsetOptions included 'NoImplicitAdd'");
326 return;
327 }
328
329 if (reqinfo) {
330 if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
332 r->connection);
333 }
335 "xlate output filter not added implicitly because %s",
336 !reqinfo->output_ctx ?
337 "no output configuration available" :
338 "another module added the filter");
339
340 if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
342 r->connection);
343 }
345 "xlate input filter not added implicitly because %s",
346 !reqinfo->input_ctx ?
347 "no input configuration available" :
348 "another module added the filter");
349 }
350}
351
352/* stuff that sucks that I know of:
353 *
354 * bucket handling:
355 * why create an eos bucket when we see it come down the stream? just send the one
356 * passed as input... news flash: this will be fixed when xlate_out_filter() starts
357 * using the more generic xlate_brigade()
358 *
359 * translation mechanics:
360 * we don't handle characters that straddle more than two buckets; an error
361 * will be generated
362 */
363
365{
366 charset_filter_ctx_t *ctx = f->ctx;
367 apr_status_t rv;
368
370 rv = ap_pass_brigade(f->next, ctx->tmpbb);
371 if (rv != APR_SUCCESS) {
372 ctx->ees = EES_DOWNSTREAM;
373 }
374 apr_brigade_cleanup(ctx->tmpbb);
375 return rv;
376}
378/* send_downstream() is passed the translated data; it puts it in a single-
379 * bucket brigade and passes the brigade to the next filter
380 */
381static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
382{
383 request_rec *r = f->r;
385 apr_bucket *b;
386
387 b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
388 return send_bucket_downstream(f, b);
389}
390
392{
393 request_rec *r = f->r;
396 apr_bucket *b;
397 charset_filter_ctx_t *ctx = f->ctx;
398 apr_status_t rv;
399
400 bb = apr_brigade_create(r->pool, c->bucket_alloc);
401 b = apr_bucket_eos_create(c->bucket_alloc);
403 rv = ap_pass_brigade(f->next, bb);
404 if (rv != APR_SUCCESS) {
405 ctx->ees = EES_DOWNSTREAM;
407 return rv;
408}
409
411 const char *partial,
413{
414 apr_status_t rv;
415
416 if (sizeof(ctx->buf) > partial_len) {
417 ctx->saved = partial_len;
419 rv = APR_SUCCESS;
420 }
421 else {
422 rv = APR_INCOMPLETE;
423 ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
424 * buckets
425 */
427 return rv;
428}
429
431 /* input buffer: */
432 const char **cur_str,
433 apr_size_t *cur_len,
434 /* output buffer: */
435 char **out_str,
437{
438 apr_status_t rv;
440
441 /* Keep adding bytes from the input string to the saved string until we
442 * 1) finish the input char
443 * 2) get an error
444 * or 3) run out of bytes to add
445 */
446
447 do {
448 ctx->buf[ctx->saved] = **cur_str;
449 ++ctx->saved;
450 ++*cur_str;
451 --*cur_len;
452 tmp_input_len = ctx->saved;
453 rv = apr_xlate_conv_buffer(ctx->xlate,
454 ctx->buf,
456 *out_str,
457 out_len);
458 } while (rv == APR_INCOMPLETE && *cur_len);
459
460 if (rv == APR_SUCCESS) {
461 ctx->saved = 0;
462 }
463 else {
464 ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
465 * straddling more than two buckets
466 */
467 }
469 return rv;
470}
471
473{
474 charset_filter_ctx_t *ctx = f->ctx;
475 const char *msg;
476 char msgbuf[100];
478
479 switch(ctx->ees) {
480 case EES_LIMIT:
481 rv = 0;
482 msg = APLOGNO(02193) "xlate filter - a built-in restriction was encountered";
483 break;
484 case EES_BAD_INPUT:
485 rv = 0;
486 msg = APLOGNO(02194) "xlate filter - an input character was invalid";
487 break;
488 case EES_BUCKET_READ:
489 rv = 0;
490 msg = APLOGNO(02195) "xlate filter - bucket read routine failed";
491 break;
493 rv = 0;
494 strcpy(msgbuf, APLOGNO(02196) "xlate filter - incomplete char at end of input - ");
495 len = ctx->saved;
496
497 /* We must ensure not to process more than what would fit in the
498 * remaining of the destination buffer, including terminating NULL */
499 if (len > (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2)
500 len = (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2;
501
502 ap_bin2hex(ctx->buf, len, msgbuf + strlen(msgbuf));
503 msg = msgbuf;
504 break;
505 case EES_DOWNSTREAM:
506 msg = APLOGNO(02197) "xlate filter - an error occurred in a lower filter";
507 break;
508 default:
509 msg = APLOGNO(02198) "xlate filter - returning error";
510 }
511 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(02997) "%s", msg);
512}
513
514/* chk_filter_chain() is called once per filter instance; it tries to
515 * determine if the current filter instance should be disabled because
516 * its translation is incompatible with the translation of an existing
517 * instance of the translate filter
518 *
519 * Example bad scenario:
520 *
521 * configured filter chain for the request:
522 * INCLUDES XLATEOUT(8859-1->UTS-16)
523 * configured filter chain for the subrequest:
524 * XLATEOUT(8859-1->UTS-16)
525 *
526 * When the subrequest is processed, the filter chain will be
527 * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
528 * This makes no sense, so the instance of XLATEOUT added for the
529 * subrequest will be noop-ed.
530 *
531 * Example good scenario:
532 *
533 * configured filter chain for the request:
534 * INCLUDES XLATEOUT(8859-1->UTS-16)
535 * configured filter chain for the subrequest:
536 * XLATEOUT(IBM-1047->8859-1)
537 *
538 * When the subrequest is processed, the filter chain will be
539 * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
540 * This makes sense, so the instance of XLATEOUT added for the
541 * subrequest will be left alone and it will translate from
542 * IBM-1047->8859-1.
543 */
544static void chk_filter_chain(ap_filter_t *f)
545{
548 *ctx = f->ctx;
549 int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);
550
551 if (ctx->noop) {
552 return;
553 }
554
555 /* walk the filter chain; see if it makes sense for our filter to
556 * do any translation
557 */
558 curf = output ? f->r->output_filters : f->r->input_filters;
559 while (curf) {
560 if (!strcasecmp(curf->frec->name, f->frec->name) &&
561 curf->ctx) {
563 if (!last_xlate_ctx) {
565 }
566 else {
567 if (strcmp(last_xlate_ctx->dc->charset_default,
568 curctx->dc->charset_source)) {
569 /* incompatible translation
570 * if our filter instance is incompatible with an instance
571 * already in place, noop our instance
572 * Notes:
573 * . We are only willing to noop our own instance.
574 * . It is possible to noop another instance which has not
575 * yet run, but this is not currently implemented.
576 * Hopefully it will not be needed.
577 * . It is not possible to noop an instance which has
578 * already run.
579 */
580 if (last_xlate_ctx == f->ctx) {
581 last_xlate_ctx->noop = 1;
582 if (APLOGrtrace1(f->r)) {
583 const char *symbol = output ? "->" : "<-";
584
586 0, f->r, APLOGNO(01451)
587 "%s %s - disabling "
588 "translation %s%s%s; existing "
589 "translation %s%s%s",
590 f->r->uri ? "uri" : "file",
591 f->r->uri ? f->r->uri : f->r->filename,
592 last_xlate_ctx->dc->charset_source,
593 symbol,
594 last_xlate_ctx->dc->charset_default,
595 curctx->dc->charset_source,
596 symbol,
597 curctx->dc->charset_default);
598 }
599 }
600 else {
601 const char *symbol = output ? "->" : "<-";
602
604 0, f->r, APLOGNO(01452)
605 "chk_filter_chain() - can't disable "
606 "translation %s%s%s; existing "
607 "translation %s%s%s",
608 last_xlate_ctx->dc->charset_source,
609 symbol,
610 last_xlate_ctx->dc->charset_default,
611 curctx->dc->charset_source,
612 symbol,
613 curctx->dc->charset_default);
614 }
615 break;
616 }
617 }
618 }
619 curf = curf->next;
620 }
621}
622
623/* xlate_brigade() is used to filter request and response bodies
624 *
625 * we'll stop when one of the following occurs:
626 * . we run out of buckets
627 * . we run out of space in the output buffer
628 * . we hit an error or metadata
629 *
630 * inputs:
631 * bb: brigade to process
632 * buffer: storage to hold the translated characters
633 * buffer_avail: size of buffer
634 * (and a few more uninteresting parms)
635 *
636 * outputs:
637 * return value: APR_SUCCESS or some error code
638 * bb: we've removed any buckets representing the
639 * translated characters; the eos bucket, if
640 * present, will be left in the brigade
641 * buffer: filled in with translated characters
642 * buffer_avail: updated with the bytes remaining
643 * hit_eos: did we hit an EOS bucket?
644 */
647 char *buffer,
649 int *hit_eos)
650{
651 apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
653 const char *bucket;
654 apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
655 apr_size_t bucket_avail; /* bytes left in current bucket */
657
658 *hit_eos = 0;
659 bucket_avail = 0;
661 while (1) {
662 if (!bucket_avail) { /* no bytes left to process in the current bucket... */
663 if (consumed_bucket) {
666 }
667 b = APR_BRIGADE_FIRST(bb);
668 if (b == APR_BRIGADE_SENTINEL(bb) ||
670 break;
671 }
673 if (rv != APR_SUCCESS) {
674 ctx->ees = EES_BUCKET_READ;
675 break;
676 }
678 consumed_bucket = b; /* for axing when we're done reading it */
679 }
680 if (bucket_avail) {
681 /* We've got data, so translate it. */
682 if (ctx->saved) {
683 /* Rats... we need to finish a partial character from the previous
684 * bucket.
685 *
686 * Strangely, finish_partial_char() increments the input buffer
687 * pointer but does not increment the output buffer pointer.
688 */
691 &bucket, &bucket_avail,
694 }
695 else {
698 rv = apr_xlate_conv_buffer(ctx->xlate,
699 bucket, &bucket_avail,
700 buffer,
703 bucket += old_bucket_avail - bucket_avail;
704
705 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
706 /* We need to save the final byte(s) for next time; we can't
707 * convert it until we look at the next bucket.
708 */
710 bucket_avail = 0;
711 }
712 }
713 if (rv != APR_SUCCESS) {
714 /* bad input byte or partial char too big to store */
715 break;
716 }
718 /* if any data remains in the current bucket, split there */
719 if (bucket_avail) {
721 }
723 break;
724 }
725 }
726 }
727
728 if (!APR_BRIGADE_EMPTY(bb)) {
729 b = APR_BRIGADE_FIRST(bb);
730 if (APR_BUCKET_IS_EOS(b)) {
731 /* Leave the eos bucket in the brigade for reporting to
732 * subsequent filters.
733 */
734 *hit_eos = 1;
735 if (ctx->saved) {
736 /* Oops... we have a partial char from the previous bucket
737 * that won't be completed because there's no more data.
738 */
739 rv = APR_INCOMPLETE;
741 }
742 }
743 }
744
745 return rv;
746}
747
748/* xlate_out_filter() handles (almost) arbitrary conversions from one charset
749 * to another...
750 * translation is determined in the fixup hook (find_code_page), which is
751 * where the filter's context data is set up... the context data gives us
752 * the translation handle
753 */
755{
756 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
757 &charset_lite_module);
758 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
759 &charset_lite_module);
760 charset_filter_ctx_t *ctx = f->ctx;
762 const char *cur_str;
763 apr_size_t cur_len, cur_avail;
764 char tmp[OUTPUT_XLATE_BUF_SIZE];
766 int done;
768
769 if (!ctx) {
770 /* this is SetOutputFilter path; grab the preallocated context,
771 * if any; note that if we decided not to do anything in an earlier
772 * handler, we won't even have a reqinfo
773 */
774 if (reqinfo) {
775 ctx = f->ctx = reqinfo->output_ctx;
776 reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
777 * in the filter chain; we can't have two
778 * instances using the same context
779 */
780 }
781 if (!ctx) { /* no idea how to translate; don't do anything */
782 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
783 ctx->dc = dc;
784 ctx->noop = 1;
785 }
786 }
787
788 /* Check the mime type to see if translation should be performed.
789 */
790 if (!ctx->noop && ctx->xlate == NULL) {
791 const char *mime_type = f->r->content_type;
792
793 if (mime_type && (ap_cstr_casecmpn(mime_type, "text/", 5) == 0 ||
795 /* On an EBCDIC machine, be willing to translate mod_autoindex-
796 * generated output. Otherwise, it doesn't look too cool.
797 *
798 * XXX This isn't a perfect fix because this doesn't trigger us
799 * to convert from the charset of the source code to ASCII. The
800 * general solution seems to be to allow a generator to set an
801 * indicator in the r specifying that the body is coded in the
802 * implementation character set (i.e., the charset of the source
803 * code). This would get several different types of documents
804 * translated properly: mod_autoindex output, mod_status output,
805 * mod_info output, hard-coded error documents, etc.
806 */
807 strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
808#endif
809 ap_cstr_casecmpn(mime_type, "message/", 8) == 0 ||
810 dc->force_xlate == FX_FORCE)) {
811
812 rv = apr_xlate_open(&ctx->xlate,
813 dc->charset_default, dc->charset_source, f->r->pool);
814 if (rv != APR_SUCCESS) {
816 "can't open translation %s->%s",
818 ctx->noop = 1;
819 }
820 else {
821 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) {
822 ctx->is_sb = 0;
823 }
824 }
825 }
826 else {
827 ctx->noop = 1;
828 if (mime_type) {
830 "mime type is %s; no translation selected",
831 mime_type);
832 }
833 }
834 }
835
837 "xlate_out_filter() - "
838 "charset_source: %s charset_default: %s",
839 dc && dc->charset_source ? dc->charset_source : "(none)",
840 dc && dc->charset_default ? dc->charset_default : "(none)");
841
842 if (!ctx->ran) { /* filter never ran before */
844 ctx->ran = 1;
845 if (!ctx->noop && !ctx->is_sb) {
846 /* We're not converting between two single-byte charsets, so unset
847 * Content-Length since it is unlikely to remain the same.
848 */
849 apr_table_unset(f->r->headers_out, "Content-Length");
850 }
851 }
852
853 if (ctx->noop) {
854 return ap_pass_brigade(f->next, bb);
855 }
856
857 dptr = APR_BRIGADE_FIRST(bb);
858 done = 0;
859 cur_len = 0;
860 space_avail = sizeof(tmp);
862 while (!done) {
863 if (!cur_len) { /* no bytes left to process in the current bucket... */
864 if (consumed_bucket) {
867 }
868 if (dptr == APR_BRIGADE_SENTINEL(bb)) {
869 break;
870 }
871 if (APR_BUCKET_IS_EOS(dptr)) {
872 cur_len = -1; /* XXX yuck, but that tells us to send
873 * eos down; when we minimize our bb construction
874 * we'll fix this crap */
875 if (ctx->saved) {
876 /* Oops... we have a partial char from the previous bucket
877 * that won't be completed because there's no more data.
878 */
879 rv = APR_INCOMPLETE;
881 }
882 break;
883 }
884 if (APR_BUCKET_IS_METADATA(dptr)) {
886 metadata_bucket = dptr;
887 dptr = APR_BUCKET_NEXT(dptr);
890 if (rv != APR_SUCCESS) {
891 done = 1;
892 }
893 continue;
894 }
895 rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
896 if (rv != APR_SUCCESS) {
897 ctx->ees = EES_BUCKET_READ;
898 break;
899 }
900 consumed_bucket = dptr; /* for axing when we're done reading it */
901 dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
902 * next bucket */
903 }
904 /* Try to fill up our tmp buffer with translated data. */
905 cur_avail = cur_len;
906
907 if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
908 if (ctx->saved) {
909 /* Rats... we need to finish a partial character from the previous
910 * bucket.
911 */
912 char *tmp_tmp;
913
914 tmp_tmp = tmp + sizeof(tmp) - space_avail;
916 &cur_str, &cur_len,
918 }
919 else {
920 rv = apr_xlate_conv_buffer(ctx->xlate,
922 tmp + sizeof(tmp) - space_avail, &space_avail);
923
924 /* Update input ptr and len after consuming some bytes */
925 cur_str += cur_len - cur_avail;
926 cur_len = cur_avail;
927
928 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
929 /* We need to save the final byte(s) for next time; we can't
930 * convert it until we look at the next bucket.
931 */
932 rv = set_aside_partial_char(ctx, cur_str, cur_len);
933 cur_len = 0;
934 }
935 }
936 }
937
938 if (rv != APR_SUCCESS) {
939 /* bad input byte or partial char too big to store */
940 done = 1;
941 }
942
944 /* It is time to flush, as there is not enough space left in the
945 * current output buffer to bother with converting more data.
946 */
947 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
948 if (rv != APR_SUCCESS) {
949 done = 1;
950 }
951
952 /* tmp is now empty */
953 space_avail = sizeof(tmp);
954 }
955 }
956
957 if (rv == APR_SUCCESS) {
958 if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
959 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
960 }
961 }
962 if (rv == APR_SUCCESS) {
963 if (cur_len == -1) {
964 rv = send_eos(f);
965 }
966 }
967 else {
968 log_xlate_error(f, rv);
969 }
971 return rv;
972}
973
977{
978 apr_status_t rv;
979 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
980 &charset_lite_module);
981 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
982 &charset_lite_module);
983 charset_filter_ctx_t *ctx = f->ctx;
985 int hit_eos;
986
987 /* just get out of the way of things we don't want. */
988 if (mode != AP_MODE_READBYTES) {
989 return ap_get_brigade(f->next, bb, mode, block, readbytes);
990 }
991
992 if (!ctx) {
993 /* this is SetInputFilter path; grab the preallocated context,
994 * if any; note that if we decided not to do anything in an earlier
995 * handler, we won't even have a reqinfo
996 */
997 if (reqinfo) {
998 ctx = f->ctx = reqinfo->input_ctx;
999 reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
1000 * in the filter chain; we can't have two
1001 * instances using the same context
1002 */
1003 }
1004 if (!ctx) { /* no idea how to translate; don't do anything */
1005 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
1006 ctx->dc = dc;
1007 ctx->noop = 1;
1008 }
1009 }
1010
1012 "xlate_in_filter() - "
1013 "charset_source: %s charset_default: %s",
1014 dc && dc->charset_source ? dc->charset_source : "(none)",
1015 dc && dc->charset_default ? dc->charset_default : "(none)");
1016
1017 if (!ctx->ran) { /* filter never ran before */
1019 ctx->ran = 1;
1020 if (!ctx->noop && !ctx->is_sb
1021 && apr_table_get(f->r->headers_in, "Content-Length")) {
1022 /* A Content-Length header is present, but it won't be valid after
1023 * conversion because we're not converting between two single-byte
1024 * charsets. This will affect most CGI scripts and may affect
1025 * some modules.
1026 * Content-Length can't be unset here because that would break
1027 * being able to read the request body.
1028 * Processing of chunked request bodies is not impacted by this
1029 * filter since the length was not declared anyway.
1030 */
1032 "Request body length may change, resulting in "
1033 "misprocessing by some modules or scripts");
1034 }
1035 }
1036
1037 if (ctx->noop) {
1038 return ap_get_brigade(f->next, bb, mode, block, readbytes);
1039 }
1040
1041 if (APR_BRIGADE_EMPTY(ctx->bb)) {
1042 if ((rv = ap_get_brigade(f->next, bb, mode, block,
1043 readbytes)) != APR_SUCCESS) {
1044 return rv;
1045 }
1046 }
1047 else {
1048 APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
1049 }
1050
1052 rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
1053 if (rv == APR_SUCCESS) {
1054 if (!hit_eos) {
1055 /* move anything leftover into our context for next time;
1056 * we don't currently "set aside" since the data came from
1057 * down below, but I suspect that for long-term we need to
1058 * do that
1059 */
1060 APR_BRIGADE_CONCAT(ctx->bb, bb);
1061 }
1062 if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
1063 apr_bucket *e;
1064
1067 NULL, f->r->connection->bucket_alloc);
1068 /* make sure we insert at the head, because there may be
1069 * an eos bucket already there, and the eos bucket should
1070 * come after the data
1071 */
1073 }
1074 else {
1075 /* XXX need to get some more data... what if the last brigade
1076 * we got had only the first byte of a multibyte char? we need
1077 * to grab more data from the network instead of returning an
1078 * empty brigade
1079 */
1080 }
1081 /* If we have any metadata at the head of ctx->bb, go ahead and move it
1082 * onto the end of bb to be returned to our caller.
1083 */
1084 if (!APR_BRIGADE_EMPTY(ctx->bb)) {
1086 while (b != APR_BRIGADE_SENTINEL(ctx->bb)
1090 b = APR_BRIGADE_FIRST(ctx->bb);
1091 }
1092 }
1093 }
1094 else {
1095 log_xlate_error(f, rv);
1096 }
1098 return rv;
1099}
1100
1101static const command_rec cmds[] =
1102{
1103 AP_INIT_TAKE1("CharsetSourceEnc",
1105 NULL,
1107 "source (html,cgi,ssi) file charset"),
1108 AP_INIT_TAKE1("CharsetDefault",
1110 NULL,
1112 "name of default charset"),
1113 AP_INIT_ITERATE("CharsetOptions",
1115 NULL,
1117 "valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, "
1130}
1131
1133{
1137 NULL,
1138 NULL,
1139 cmds,
1141};
1142
const char apr_size_t len
Definition ap_regex.h:187
APR-UTIL Buckets/Bucket Brigades.
APR general purpose library routines.
APR Strings library.
APR Standard Headers Support.
APR I18N translation library.
#define AP_INIT_TAKE1(directive, func, mconfig, where, help)
#define ap_get_module_config(v, m)
#define AP_DECLARE_MODULE(foo)
ap_conf_vector_t * base
#define AP_INIT_ITERATE(directive, func, mconfig, where, help)
#define ap_set_module_config(v, m, val)
request_rec * r
#define DECLINED
Definition httpd.h:457
apr_status_t ap_pass_brigade(ap_filter_t *filter, apr_bucket_brigade *bucket)
ap_filter_t * ap_add_input_filter(const char *name, void *ctx, request_rec *r, conn_rec *c)
ap_filter_rec_t * ap_register_output_filter(const char *name, ap_out_filter_func filter_func, ap_init_filter_func filter_init, ap_filter_type ftype)
ap_filter_t * ap_add_output_filter(const char *name, void *ctx, request_rec *r, conn_rec *c)
apr_status_t ap_filter_rec_t * ap_register_input_filter(const char *name, ap_in_filter_func filter_func, ap_init_filter_func filter_init, ap_filter_type ftype)
apr_status_t ap_get_brigade(ap_filter_t *filter, apr_bucket_brigade *bucket, ap_input_mode_t mode, apr_read_type_e block, apr_off_t readbytes)
@ AP_FTYPE_RESOURCE
#define APLOGNO(n)
Definition http_log.h:117
#define ap_log_rerror
Definition http_log.h:454
#define APLOG_ERR
Definition http_log.h:67
#define APLOG_TRACE3
Definition http_log.h:74
#define APLOG_MARK
Definition http_log.h:283
#define APLOGrtrace1(r)
Definition http_log.h:246
#define APLOG_TRACE1
Definition http_log.h:72
#define APLOG_TRACE6
Definition http_log.h:77
#define APLOG_DEBUG
Definition http_log.h:71
void ap_hook_fixups(ap_HOOK_fixups_t *pf, const char *const *aszPre, const char *const *aszSucc, int nOrder)
Definition request.c:87
void ap_hook_insert_filter(ap_HOOK_insert_filter_t *pf, const char *const *aszPre, const char *const *aszSucc, int nOrder)
Definition request.c:96
void * dummy
Definition http_vhost.h:62
#define APR_INCOMPLETE
Definition apr_errno.h:452
unsigned int const unsigned char apr_size_t char apr_size_t out_len
Definition apr_md5.h:155
apr_file_t * f
#define APR_BUCKET_REMOVE(e)
#define APR_BUCKET_IS_METADATA(e)
#define APR_BRIGADE_PREPEND(a, b)
#define APR_BRIGADE_INSERT_TAIL(b, e)
#define apr_bucket_split(e, point)
#define APR_BRIGADE_INSERT_HEAD(b, e)
#define APR_BUCKET_NEXT(e)
apr_read_type_e
Definition apr_buckets.h:57
apr_bucket * e
#define APR_BRIGADE_CONCAT(a, b)
#define APR_BRIGADE_EMPTY(b)
#define APR_BRIGADE_SENTINEL(b)
#define apr_bucket_delete(e)
#define APR_BUCKET_IS_EOS(e)
apr_brigade_flush void * ctx
apr_bucket apr_bucket_brigade * a
#define APR_BRIGADE_FIRST(b)
#define apr_bucket_read(e, str, len, block)
@ APR_BLOCK_READ
Definition apr_buckets.h:58
apr_dbd_transaction_t int mode
Definition apr_dbd.h:261
const char apr_ssize_t int partial
Definition apr_escape.h:188
#define APR_HOOK_MIDDLE
Definition apr_hooks.h:303
#define APR_HOOK_REALLY_LAST
Definition apr_hooks.h:307
struct apr_xlate_t apr_xlate_t
Definition apr_xlate.h:39
#define OR_FILEINFO
#define HTTP_INTERNAL_SERVER_ERROR
Definition httpd.h:535
#define M_PUT
Definition httpd.h:593
#define M_POST
Definition httpd.h:594
#define STANDARD20_MODULE_STUFF
void ap_bin2hex(const void *src, apr_size_t srclen, char *dest)
Definition util.c:2314
int ap_cstr_casecmpn(const char *s1, const char *s2, apr_size_t n)
Definition util.c:3559
apr_size_t size
#define APR_SUCCESS
Definition apr_errno.h:225
int apr_status_t
Definition apr_errno.h:44
const char apr_int32_t flag
char * buffer
int strcasecmp(const char *a, const char *b)
apr_vformatter_buff_t * c
Definition apr_lib.h:175
apr_pool_t * b
Definition apr_pools.h:529
#define apr_pcalloc(p, size)
Definition apr_pools.h:465
apr_cmdtype_e cmd
#define DIR_MAGIC_TYPE
Definition httpd.h:711
Apache Configuration.
CORE HTTP Daemon.
Apache Logging library.
Command line options.
HTTP protocol handling.
Apache Request library.
HTTP Daemon routines.
apr_pool_t * p
Definition md_event.c:32
static const char * add_charset_source(cmd_parms *cmd, void *in_dc, const char *name)
static int configured_on_output(request_rec *r, const char *filter_name)
static void * merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
@ EES_LIMIT
@ EES_INIT
@ EES_INCOMPLETE_CHAR
@ EES_BUCKET_READ
@ EES_DOWNSTREAM
@ EES_BAD_INPUT
static int configured_on_input(request_rec *r, const char *filter_name)
static void xlate_insert_filter(request_rec *r)
#define FATTEST_CHAR
#define XLATEIN_FILTER_NAME
static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
static int configured_in_list(request_rec *r, const char *filter_name, struct ap_filter_t *filter_list)
#define OUTPUT_XLATE_BUF_SIZE
static void * create_charset_dir_conf(apr_pool_t *p, char *dummy)
static void chk_filter_chain(ap_filter_t *f)
static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx, const char *partial, apr_size_t partial_len)
static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx, const char **cur_str, apr_size_t *cur_len, char **out_str, apr_size_t *out_len)
static apr_status_t xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb, ap_input_mode_t mode, apr_read_type_e block, apr_off_t readbytes)
static apr_status_t send_bucket_downstream(ap_filter_t *f, apr_bucket *b)
static void charset_register_hooks(apr_pool_t *p)
static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
static const command_rec cmds[]
#define INPUT_XLATE_BUF_SIZE
static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx, apr_bucket_brigade *bb, char *buffer, apr_size_t *buffer_avail, int *hit_eos)
static const char * add_charset_options(cmd_parms *cmd, void *in_dc, const char *flag)
static int find_code_page(request_rec *r)
static apr_status_t send_eos(ap_filter_t *f)
#define XLATE_MIN_BUFF_LEFT
#define XLATEOUT_FILTER_NAME
static const char * add_charset_default(cmd_parms *cmd, void *in_dc, const char *name)
static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
return NULL
Definition mod_so.c:359
char * name
const char * name
The representation of a filter chain.
ap_filter_rec_t * frec
ap_filter_t * next
apr_pool_t * pool
const char * charset_source
enum charset_dir_t::@13 implicit_add
enum charset_dir_t::@14 force_xlate
const char * charset_default
apr_bucket_brigade * bb
apr_bucket_brigade * tmpbb
charset_dir_t * dc
charset_filter_ctx_t * input_ctx
charset_filter_ctx_t * output_ctx
Structure to store things which are per connection.
Definition httpd.h:1152
struct apr_bucket_alloc_t * bucket_alloc
Definition httpd.h:1201
A structure that represents the current request.
Definition httpd.h:845
char * uri
Definition httpd.h:1016
const char * content_type
Definition httpd.h:992
struct ap_filter_t * output_filters
Definition httpd.h:1070
request_rec * prev
Definition httpd.h:856
int method_number
Definition httpd.h:898
apr_pool_t * pool
Definition httpd.h:847
char * filename
Definition httpd.h:1018
int proxyreq
Definition httpd.h:873
conn_rec * connection
Definition httpd.h:849
struct ap_filter_t * input_filters
Definition httpd.h:1072
struct ap_conf_vector_t * request_config
Definition httpd.h:1049
request_rec * main
Definition httpd.h:860
struct ap_conf_vector_t * per_dir_config
Definition httpd.h:1047
charset conversion
Apache filter library.
ap_input_mode_t
input filtering modes
Definition util_filter.h:41
@ AP_MODE_READBYTES
Definition util_filter.h:43