Apache HTTPD
mod_unique_id.c
Go to the documentation of this file.
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * mod_unique_id.c: generate a unique identifier for each request
19 *
20 * Original author: Dean Gaudet <[email protected]>
21 * UUencoding modified by: Alvaro Martinez Echevarria <[email protected]>
22 */
23
24#define APR_WANT_BYTEFUNC /* for htons() et al */
25#include "apr_want.h"
26#include "apr_general.h" /* for APR_OFFSETOF */
27#include "apr_network_io.h"
28
29#ifdef APR_HAS_THREADS
30#include "apr_atomic.h" /* for apr_atomic_inc32 */
31#include "mpm_common.h" /* for ap_mpm_query */
32#endif
33
34#include "httpd.h"
35#include "http_config.h"
36#include "http_log.h"
37#include "http_protocol.h" /* for ap_hook_post_read_request */
38
39#define ROOT_SIZE 10
40
41typedef struct {
42 unsigned int stamp;
43 char root[ROOT_SIZE];
44 unsigned short counter;
45 unsigned int thread_index;
47
48/* We are using thread_index (the index into the scoreboard), because we
49 * cannot guarantee the thread_id will be an integer.
50 *
51 * This code looks like it won't give a unique ID with the new thread logic.
52 * It will. The reason is, we don't increment the counter in a thread_safe
53 * manner. Because the thread_index is also in the unique ID now, this does
54 * not matter. In order for the id to not be unique, the same thread would
55 * have to get the same counter twice in the same second.
56 */
57
58/* Comments:
59 *
60 * We want an identifier which is unique across all hits, everywhere.
61 * "everywhere" includes multiple httpd instances on the same machine, or on
62 * multiple machines. Essentially "everywhere" should include all possible
63 * httpds across all servers at a particular "site". We make some assumptions
64 * that if the site has a cluster of machines then their time is relatively
65 * synchronized. We also assume that the first address returned by a
66 * gethostbyname (gethostname()) is unique across all the machines at the
67 * "site".
68 *
69 * The root is assumed to absolutely uniquely identify this one child
70 * from all other currently running children on all servers (including
71 * this physical server if it is running multiple httpds) from each
72 * other.
73 *
74 * The stamp and counter are used to distinguish all hits for a
75 * particular root. The stamp is updated using r->request_time,
76 * saving cpu cycles. The counter is never reset, and is used to
77 * permit up to 64k requests in a single second by a single child.
78 *
79 * The 144-bits of unique_id_rec are encoded using the alphabet
80 * [A-Za-z0-9@-], resulting in 24 bytes of printable characters. That is then
81 * stuffed into the environment variable UNIQUE_ID so that it is available to
82 * other modules. The alphabet choice differs from normal base64 encoding
83 * [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
84 * make it easy to use UNIQUE_ID in URLs.
85 *
86 * Note that UNIQUE_ID should be considered an opaque token by other
87 * applications. No attempt should be made to dissect its internal components.
88 * It is an abstraction that may change in the future as the needs of this
89 * module change.
90 *
91 * It is highly desirable that identifiers exist for "eternity". But future
92 * needs (such as much faster webservers, or moving to a
93 * multithreaded server) may dictate a need to change the contents of
94 * unique_id_rec. Such a future implementation should ensure that the first
95 * field is still a time_t stamp. By doing that, it is possible for a site to
96 * have a "flag second" in which they stop all of their old-format servers,
97 * wait one entire second, and then start all of their new-servers. This
98 * procedure will ensure that the new space of identifiers is completely unique
99 * from the old space. (Since the first four unencoded bytes always differ.)
100 *
101 * Note: previous implementations used 32-bits of IP address plus pid
102 * in place of the PRNG output in the "root" field. This was
103 * insufficient for IPv6-only hosts, required working DNS to determine
104 * a unique IP address (fragile), and needed a [0, 1) second sleep
105 * call at startup to avoid pid reuse. Use of the PRNG avoids all
106 * these issues.
107 */
108
109/*
110 * Sun Jun 7 05:43:49 CEST 1998 -- Alvaro
111 * More comments:
112 * 1) The UUencoding procedure is now done in a general way, avoiding the problems
113 * with sizes and paddings that can arise depending on the architecture. Now the
114 * offsets and sizes of the elements of the unique_id_rec structure are calculated
115 * in unique_id_global_init; and then used to duplicate the structure without the
116 * paddings that might exist. The multithreaded server fix should be now very easy:
117 * just add a new "tid" field to the unique_id_rec structure, and increase by one
118 * UNIQUE_ID_REC_MAX.
119 * 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
120 * its size is 64bits on some platforms (linux/alpha), and this caused problems with
121 * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
122 */
123
124/*
125 * XXX: We should have a per-thread counter and not use cur_unique_id.counter
126 * XXX: in all threads, because this is bad for performance on multi-processor
127 * XXX: systems: Writing to the same address from several CPUs causes cache
128 * XXX: thrashing.
129 */
132#ifdef APR_HAS_THREADS
133static int is_threaded_mpm;
134#endif
135
136/*
137 * Number of elements in the structure unique_id_rec.
138 */
139#define UNIQUE_ID_REC_MAX 4
140
145
147{
148 /*
149 * Calculate the sizes and offsets in cur_unique_id.
150 */
161
162 /*
163 * Calculate the size of the structure when encoded.
164 */
166
167 return OK;
168}
169
171{
172#ifdef APR_HAS_THREADS
173 is_threaded_mpm = 0;
175#endif
176
178 sizeof(cur_unique_id.root));
179
180 /*
181 * If we use 0 as the initial counter we have a little less protection
182 * against restart problems, and a little less protection against a clock
183 * going backwards in time.
184 */
186 sizeof(cur_unique_counter));
187}
188
189/* Use the base64url encoding per RFC 4648, avoiding characters which
190 * are not safe in URLs. ### TODO: can switch to apr_encode_*. */
191static const char uuencoder[64] = {
192 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
193 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
194 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
195 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
196 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
197};
198
199#ifndef APR_UINT16_MAX
200#define APR_UINT16_MAX 0xffffu
201#endif
202
203static const char *gen_unique_id(const request_rec *r)
204{
205 char *str;
206 /*
207 * Buffer padded with two final bytes, used to copy the unique_id_rec
208 * structure without the internal paddings that it could have.
209 */
211 struct {
213 unsigned char pad[2];
214 } paddedbuf;
215 apr_uint32_t counter;
216 unsigned char *x,*y;
217 int i,j,k;
218
220 new_unique_id.stamp = htonl((unsigned int)apr_time_sec(r->request_time));
221 new_unique_id.thread_index = htonl((unsigned int)r->connection->id);
222#ifdef APR_HAS_THREADS
223 if (is_threaded_mpm)
225 else
226#endif
227 counter = cur_unique_counter++;
228
229 /* The counter is two bytes for the uuencoded unique id, in network
230 * byte order.
231 */
232 new_unique_id.counter = htons(counter % APR_UINT16_MAX);
233
234 /* we'll use a temporal buffer to avoid uuencoding the possible internal
235 * paddings of the original structure */
236 x = (unsigned char *) &paddedbuf;
237 k = 0;
238 for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
239 y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
240 for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
241 x[k] = y[j];
242 }
243 }
244 /*
245 * We reset two more bytes just in case padding is needed for the uuencoding.
246 */
247 x[k++] = '\0';
248 x[k++] = '\0';
249
250 /* alloc str and do the uuencoding */
251 str = (char *)apr_palloc(r->pool, unique_id_rec_size_uu + 1);
252 k = 0;
253 for (i = 0; i < unique_id_rec_total_size; i += 3) {
254 y = x + i;
255 str[k++] = uuencoder[y[0] >> 2];
256 str[k++] = uuencoder[((y[0] & 0x03) << 4) | ((y[1] & 0xf0) >> 4)];
257 if (k == unique_id_rec_size_uu) break;
258 str[k++] = uuencoder[((y[1] & 0x0f) << 2) | ((y[2] & 0xc0) >> 6)];
259 if (k == unique_id_rec_size_uu) break;
260 str[k++] = uuencoder[y[2] & 0x3f];
261 }
262 str[k++] = '\0';
263
264 return str;
265}
266
267/*
268 * There are two ways the generation of a unique id can be triggered:
269 *
270 * - from the post_read_request hook which calls set_unique_id()
271 * - from error logging via the generate_log_id hook which calls
272 * generate_log_id(). This may happen before or after set_unique_id()
273 * has been called, or not at all.
274 */
275
276static int generate_log_id(const conn_rec *c, const request_rec *r,
277 const char **id)
278{
279 /* we do not care about connection ids */
280 if (r == NULL)
281 return DECLINED;
282
283 /* XXX: do we need special handling for internal redirects? */
284
285 /* if set_unique_id() has been called for this request, use it */
286 *id = apr_table_get(r->subprocess_env, "UNIQUE_ID");
287
288 if (!*id)
289 *id = gen_unique_id(r);
290 return OK;
291}
292
294{
295 const char *id = NULL;
296 /* copy the unique_id if this is an internal redirect (we're never
297 * actually called for sub requests, so we don't need to test for
298 * them) */
299 if (r->prev) {
300 id = apr_table_get(r->subprocess_env, "REDIRECT_UNIQUE_ID");
301 }
302
303 if (!id) {
304 /* if we have a log id, it was set by our generate_log_id() function
305 * and we should reuse the same id
306 */
307 id = r->log_id;
308 }
309
310 if (!id) {
311 id = gen_unique_id(r);
312 }
313
314 /* set the environment variable */
315 apr_table_setn(r->subprocess_env, "UNIQUE_ID", id);
316
317 return DECLINED;
318}
319
327
330 NULL, /* dir config creater */
331 NULL, /* dir merger --- default is to override */
332 NULL, /* server config */
333 NULL, /* merge server configs */
334 NULL, /* command apr_table_t */
335 register_hooks /* register hooks */
336};
APR Atomic Operations.
APR Miscellaneous library routines.
APR Network library.
APR Standard Headers Support.
apr_uint32_t apr_atomic_inc32(volatile apr_uint32_t *mem)
Definition atomic.c:51
void ap_hook_post_config(ap_HOOK_post_config_t *pf, const char *const *aszPre, const char *const *aszSucc, int nOrder)
Definition config.c:105
#define AP_DECLARE_MODULE(foo)
request_rec * r
void ap_hook_child_init(ap_HOOK_child_init_t *pf, const char *const *aszPre, const char *const *aszSucc, int nOrder)
Definition config.c:167
const char server_rec * main_server
#define DECLINED
Definition httpd.h:457
#define OK
Definition httpd.h:456
void ap_hook_generate_log_id(ap_HOOK_generate_log_id_t *pf, const char *const *aszPre, const char *const *aszSucc, int nOrder)
Definition log.c:2009
void ap_hook_post_read_request(ap_HOOK_post_read_request_t *pf, const char *const *aszPre, const char *const *aszSucc, int nOrder)
Definition protocol.c:2585
#define APR_HOOK_MIDDLE
Definition apr_hooks.h:303
#define STANDARD20_MODULE_STUFF
void ap_random_insecure_bytes(void *buf, apr_size_t size)
Definition core.c:5455
apr_size_t size
apr_vformatter_buff_t * c
Definition apr_lib.h:175
const char * s
Definition apr_strings.h:95
#define apr_time_sec(time)
Definition apr_time.h:63
apr_status_t ap_mpm_query(int query_code, int *result)
Definition mpm_common.c:421
#define AP_MPMQ_IS_THREADED
Definition ap_mpm.h:152
Apache Configuration.
Apache Logging library.
HTTP protocol handling.
HTTP Daemon routines.
apr_pool_t * p
Definition md_event.c:32
return NULL
Definition mod_so.c:359
int i
Definition mod_so.c:347
static unique_id_rec cur_unique_id
static unsigned short unique_id_rec_total_size
#define APR_UINT16_MAX
static unsigned short unique_id_rec_offset[4]
static const char * gen_unique_id(const request_rec *r)
static int unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
static unsigned short unique_id_rec_size[4]
#define ROOT_SIZE
static void unique_id_child_init(apr_pool_t *p, server_rec *s)
static int generate_log_id(const conn_rec *c, const request_rec *r, const char **id)
static void register_hooks(apr_pool_t *p)
static const char uuencoder[64]
static unsigned short unique_id_rec_size_uu
#define UNIQUE_ID_REC_MAX
static apr_uint32_t cur_unique_counter
static int set_unique_id(request_rec *r)
Multi-Processing Modules functions.
Structure to store things which are per connection.
Definition httpd.h:1152
long id
Definition httpd.h:1187
A structure that represents the current request.
Definition httpd.h:845
request_rec * prev
Definition httpd.h:856
apr_pool_t * pool
Definition httpd.h:847
apr_time_t request_time
Definition httpd.h:886
conn_rec * connection
Definition httpd.h:849
const char * log_id
Definition httpd.h:1059
apr_table_t * subprocess_env
Definition httpd.h:983
A structure to store information for each virtual server.
Definition httpd.h:1322
unsigned int thread_index
unsigned int stamp
unsigned short counter
#define str