Discussion:
[nginx] Sub filter: support of multiple strings to replace.
Dmitry Volyntsev
2015-08-17 14:44:21 UTC
Permalink
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
branches:
changeset: 6228:b9447fc457b4
user: Dmitry Volyntsev <***@nginx.com>
date: Mon Aug 17 17:42:02 2015 +0300
description:
Sub filter: support of multiple strings to replace.

diffstat:

src/http/modules/ngx_http_sub_filter_module.c | 433 +++++++++++++++----------
1 files changed, 257 insertions(+), 176 deletions(-)

diffs (truncated from 638 to 300 lines):

diff -r bd55d75a1410 -r b9447fc457b4 src/http/modules/ngx_http_sub_filter_module.c
--- a/src/http/modules/ngx_http_sub_filter_module.c Sun Aug 16 10:51:34 2015 +0300
+++ b/src/http/modules/ngx_http_sub_filter_module.c Mon Aug 17 17:42:02 2015 +0300
@@ -13,6 +13,20 @@
typedef struct {
ngx_str_t match;
ngx_http_complex_value_t value;
+} ngx_http_sub_match_t;
+
+
+typedef struct {
+ ngx_uint_t min_match_len;
+ ngx_uint_t max_match_len;
+
+ u_char index[257];
+ u_char shift[256];
+} ngx_http_sub_tables_t;
+
+
+typedef struct {
+ ngx_http_sub_tables_t *tables;

ngx_hash_t types;

@@ -20,17 +34,11 @@ typedef struct {
ngx_flag_t last_modified;

ngx_array_t *types_keys;
+ ngx_array_t *matches;
} ngx_http_sub_loc_conf_t;


-typedef enum {
- sub_start_state = 0,
- sub_match_state,
-} ngx_http_sub_state_e;
-
-
typedef struct {
- ngx_str_t match;
ngx_str_t saved;
ngx_str_t looked;

@@ -48,12 +56,17 @@ typedef struct {
ngx_chain_t *busy;
ngx_chain_t *free;

- ngx_str_t sub;
+ ngx_str_t *sub;
+ ngx_uint_t applied;

- ngx_uint_t state;
+ ngx_int_t offset;
+ ngx_uint_t index;
} ngx_http_sub_ctx_t;


+static ngx_uint_t ngx_http_sub_cmp_index;
+
+
static ngx_int_t ngx_http_sub_output(ngx_http_request_t *r,
ngx_http_sub_ctx_t *ctx);
static ngx_int_t ngx_http_sub_parse(ngx_http_request_t *r,
@@ -64,6 +77,9 @@ static char * ngx_http_sub_filter(ngx_co
static void *ngx_http_sub_create_conf(ngx_conf_t *cf);
static char *ngx_http_sub_merge_conf(ngx_conf_t *cf,
void *parent, void *child);
+static void ngx_http_sub_init_tables(ngx_http_sub_tables_t *tables,
+ ngx_http_sub_match_t *match, ngx_uint_t n);
+static ngx_int_t ngx_http_sub_cmp_matches(const void *one, const void *two);
static ngx_int_t ngx_http_sub_filter_init(ngx_conf_t *cf);


@@ -144,7 +160,7 @@ ngx_http_sub_header_filter(ngx_http_requ

slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module);

- if (slcf->match.len == 0
+ if (slcf->matches == NULL
|| r->headers_out.content_length_n == 0
|| ngx_http_test_content_type(r, &slcf->types) == NULL)
{
@@ -156,19 +172,19 @@ ngx_http_sub_header_filter(ngx_http_requ
return NGX_ERROR;
}

- ctx->saved.data = ngx_pnalloc(r->pool, slcf->match.len);
+ ctx->saved.data = ngx_pnalloc(r->pool, slcf->tables->max_match_len - 1);
if (ctx->saved.data == NULL) {
return NGX_ERROR;
}

- ctx->looked.data = ngx_pnalloc(r->pool, slcf->match.len);
+ ctx->looked.data = ngx_pnalloc(r->pool, slcf->tables->max_match_len - 1);
if (ctx->looked.data == NULL) {
return NGX_ERROR;
}

ngx_http_set_ctx(r, ctx, ngx_http_sub_filter_module);

- ctx->match = slcf->match;
+ ctx->offset = slcf->tables->min_match_len - 1;
ctx->last_out = &ctx->out;

r->filter_need_in_memory = 1;
@@ -194,8 +210,10 @@ ngx_http_sub_body_filter(ngx_http_reques
{
ngx_int_t rc;
ngx_buf_t *b;
+ ngx_str_t *sub;
ngx_chain_t *cl;
ngx_http_sub_ctx_t *ctx;
+ ngx_http_sub_match_t *match;
ngx_http_sub_loc_conf_t *slcf;

ctx = ngx_http_get_module_ctx(r, ngx_http_sub_filter_module);
@@ -242,18 +260,10 @@ ngx_http_sub_body_filter(ngx_http_reques
ctx->pos = ctx->buf->pos;
}

- if (ctx->state == sub_start_state) {
- ctx->copy_start = ctx->pos;
- ctx->copy_end = ctx->pos;
- }
-
b = NULL;

while (ctx->pos < ctx->buf->last) {

- ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
- "saved: \"%V\" state: %d", &ctx->saved, ctx->state);
-
rc = ngx_http_sub_parse(r, ctx);

ngx_log_debug4(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
@@ -320,20 +330,6 @@ ngx_http_sub_body_filter(ngx_http_reques
ctx->last_out = &cl->next;
}

- if (ctx->state == sub_start_state) {
- ctx->copy_start = ctx->pos;
- ctx->copy_end = ctx->pos;
-
- } else {
- ctx->copy_start = NULL;
- ctx->copy_end = NULL;
- }
-
- if (ctx->looked.len > (size_t) (ctx->pos - ctx->buf->pos)) {
- ctx->saved.len = ctx->looked.len - (ctx->pos - ctx->buf->pos);
- ngx_memcpy(ctx->saved.data, ctx->looked.data, ctx->saved.len);
- }
-
if (rc == NGX_AGAIN) {
continue;
}
@@ -352,19 +348,30 @@ ngx_http_sub_body_filter(ngx_http_reques

slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module);

- if (ctx->sub.data == NULL) {
+ if (ctx->sub == NULL) {
+ ctx->sub = ngx_pcalloc(r->pool, sizeof(ngx_str_t)
+ * slcf->matches->nelts);
+ if (ctx->sub == NULL) {
+ return NGX_ERROR;
+ }
+ }

- if (ngx_http_complex_value(r, &slcf->value, &ctx->sub)
+ sub = &ctx->sub[ctx->index];
+
+ if (sub->data == NULL) {
+ match = slcf->matches->elts;
+
+ if (ngx_http_complex_value(r, &match[ctx->index].value, sub)
!= NGX_OK)
{
return NGX_ERROR;
}
}

- if (ctx->sub.len) {
+ if (sub->len) {
b->memory = 1;
- b->pos = ctx->sub.data;
- b->last = ctx->sub.data + ctx->sub.len;
+ b->pos = sub->data;
+ b->last = sub->data + sub->len;

} else {
b->sync = 1;
@@ -373,7 +380,8 @@ ngx_http_sub_body_filter(ngx_http_reques
*ctx->last_out = cl;
ctx->last_out = &cl->next;

- ctx->once = slcf->once;
+ ctx->index = 0;
+ ctx->once = slcf->once && (++ctx->applied == slcf->matches->nelts);

continue;
}
@@ -428,9 +436,6 @@ ngx_http_sub_body_filter(ngx_http_reques
}

ctx->buf = NULL;
-
- ctx->saved.len = ctx->looked.len;
- ngx_memcpy(ctx->saved.data, ctx->looked.data, ctx->looked.len);
}

if (ctx->out == NULL && ctx->busy == NULL) {
@@ -513,158 +518,142 @@ ngx_http_sub_output(ngx_http_request_t *
static ngx_int_t
ngx_http_sub_parse(ngx_http_request_t *r, ngx_http_sub_ctx_t *ctx)
{
- u_char *p, *last, *copy_end, ch, match;
- size_t looked, i;
- ngx_http_sub_state_e state;
+ u_char *p, *last, *pat, *pat_end, c;
+ ngx_str_t *m;
+ ngx_int_t offset, start, next, end, len, rc;
+ ngx_uint_t shift, i, j;
+ ngx_http_sub_match_t *match;
+ ngx_http_sub_tables_t *tables;
+ ngx_http_sub_loc_conf_t *slcf;
+
+ slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module);
+ tables = slcf->tables;
+
+ offset = ctx->offset;
+ end = ctx->buf->last - ctx->pos;

if (ctx->once) {
- ctx->copy_start = ctx->pos;
- ctx->copy_end = ctx->buf->last;
- ctx->pos = ctx->buf->last;
- ctx->looked.len = 0;
-
- ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "once");
-
- return NGX_AGAIN;
+ /* sets start and next to end */
+ offset = end + (ngx_int_t) tables->min_match_len - 1;
+ goto again;
}

- state = ctx->state;
- looked = ctx->looked.len;
- last = ctx->buf->last;
- copy_end = ctx->copy_end;
+ while (offset < end) {

- for (p = ctx->pos; p < last; p++) {
+ c = offset < 0 ? ctx->looked.data[ctx->looked.len + offset]
+ : ctx->pos[offset];

- ch = *p;
- ch = ngx_tolower(ch);
+ c = ngx_tolower(c);

- if (state == sub_start_state) {
-
- /* the tight loop */
-
- match = ctx->match.data[0];
-
- for ( ;; ) {
- if (ch == match) {
-
- if (ctx->match.len == 1) {
- ctx->pos = p + 1;
- ctx->copy_end = p;
-
- return NGX_OK;
- }
-
- copy_end = p;
- ctx->looked.data[0] = *p;
- looked = 1;
- state = sub_match_state;
-
- goto match_started;
- }
-
- if (++p == last) {
- break;
- }
-
- ch = *p;
- ch = ngx_tolower(ch);
- }
-
- ctx->state = state;
- ctx->pos = p;
- ctx->looked.len = looked;
- ctx->copy_end = p;
-
- if (ctx->copy_start == NULL) {
- ctx->copy_start = ctx->buf->pos;
Jim Popovitch
2015-08-18 16:08:12 UTC
Permalink
Post by Dmitry Volyntsev
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
changeset: 6228:b9447fc457b4
date: Mon Aug 17 17:42:02 2015 +0300
Sub filter: support of multiple strings to replace.
Hello,

Can you please provide an example of use.

Thank you!

-Jim P.
Valentin V. Bartenev
2015-08-18 16:37:38 UTC
Permalink
Post by Jim Popovitch
Post by Dmitry Volyntsev
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
changeset: 6228:b9447fc457b4
date: Mon Aug 17 17:42:02 2015 +0300
Sub filter: support of multiple strings to replace.
Hello,
Can you please provide an example of use.
Thank you!
See an example in the module documentation:
http://nginx.org/en/docs/http/ngx_http_sub_module.html

wbr, Valentin V. Bartenev
Jim Popovitch
2015-08-18 16:44:06 UTC
Permalink
Post by Valentin V. Bartenev
Post by Jim Popovitch
Post by Dmitry Volyntsev
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
changeset: 6228:b9447fc457b4
date: Mon Aug 17 17:42:02 2015 +0300
Sub filter: support of multiple strings to replace.
Hello,
Can you please provide an example of use.
Thank you!
http://nginx.org/en/docs/http/ngx_http_sub_module.html
Thanks, I am aware of that. What is missing from the docs is the
format for multiple sub_filter entries.

Is it:

a) sub_filter "matchA" "replaceA" "matchB" "replaceB";

b) sub_filter "matchA" "replaceA"
sub_filter "matchB" "replaceB"

c) sub_fitler "match1" "match2" "match3" "replaceAll"

d) Only 1 sub_filter allowed per location or server

And then there's the issue of sub_filter_last_modified and
sub_filter_once applying to all, or just the preceding or the
following entry?

Multiple sub_filters is a great addition, docs are unclear on usage. :-)

-Jim P.
Dmitry
2015-08-18 16:56:14 UTC
Permalink
Post by Jim Popovitch
Post by Valentin V. Bartenev
Post by Jim Popovitch
Post by Dmitry Volyntsev
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
changeset: 6228:b9447fc457b4
date: Mon Aug 17 17:42:02 2015 +0300
Sub filter: support of multiple strings to replace.
Hello,
Can you please provide an example of use.
Thank you!
http://nginx.org/en/docs/http/ngx_http_sub_module.html
Thanks, I am aware of that. What is missing from the docs is the
format for multiple sub_filter entries.
a) sub_filter "matchA" "replaceA" "matchB" "replaceB";
b) sub_filter "matchA" "replaceA"
sub_filter "matchB" "replaceB"
c) sub_fitler "match1" "match2" "match3" "replaceAll"
d) Only 1 sub_filter allowed per location or server
Only this option is applicable here.
Post by Jim Popovitch
And then there's the issue of sub_filter_last_modified and
sub_filter_once applying to all, or just the preceding or the
following entry?
sub_filter_once on; is designed to support configurations like
this one:

sub_filter '<head> '<head><link ...>';
sub_filter '<body> '<body><script ...>';

Here, each replacement will be applied only once.
Post by Jim Popovitch
Multiple sub_filters is a great addition, docs are unclear on usage. :-)
-Jim P.
_______________________________________________
nginx-devel mailing list
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Jim Popovitch
2015-08-18 16:59:05 UTC
Permalink
Post by Dmitry
Post by Jim Popovitch
Post by Valentin V. Bartenev
Post by Jim Popovitch
Post by Dmitry Volyntsev
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
changeset: 6228:b9447fc457b4
date: Mon Aug 17 17:42:02 2015 +0300
Sub filter: support of multiple strings to replace.
Hello,
Can you please provide an example of use.
Thank you!
http://nginx.org/en/docs/http/ngx_http_sub_module.html
Thanks, I am aware of that. What is missing from the docs is the
format for multiple sub_filter entries.
a) sub_filter "matchA" "replaceA" "matchB" "replaceB";
b) sub_filter "matchA" "replaceA"
sub_filter "matchB" "replaceB"
c) sub_fitler "match1" "match2" "match3" "replaceAll"
d) Only 1 sub_filter allowed per location or server
Only this option is applicable here.
Post by Jim Popovitch
And then there's the issue of sub_filter_last_modified and
sub_filter_once applying to all, or just the preceding or the
following entry?
sub_filter_once on; is designed to support configurations like this
sub_filter '<head> '<head><link ...>';
sub_filter '<body> '<body><script ...>';
Here, each replacement will be applied only once.
Thank you very much Dmitry

-Jim P.
Valentin V. Bartenev
2015-08-18 17:07:11 UTC
Permalink
Post by Jim Popovitch
Post by Valentin V. Bartenev
Post by Jim Popovitch
Post by Dmitry Volyntsev
details: http://hg.nginx.org/nginx/rev/b9447fc457b4
changeset: 6228:b9447fc457b4
date: Mon Aug 17 17:42:02 2015 +0300
Sub filter: support of multiple strings to replace.
Hello,
Can you please provide an example of use.
Thank you!
http://nginx.org/en/docs/http/ngx_http_sub_module.html
Thanks, I am aware of that. What is missing from the docs is the
format for multiple sub_filter entries.
a) sub_filter "matchA" "replaceA" "matchB" "replaceB";
b) sub_filter "matchA" "replaceA"
sub_filter "matchB" "replaceB"
c) sub_fitler "match1" "match2" "match3" "replaceAll"
d) Only 1 sub_filter allowed per location or server
And then there's the issue of sub_filter_last_modified and
sub_filter_once applying to all, or just the preceding or the
following entry?
Multiple sub_filters is a great addition, docs are unclear on usage. :-)
Could you elaborate what exactly is unclear from the docs
with citations? Then we will be able to improve it.

The "sub_filter" directive in the docs has specified syntax:

sub_filter string replacement;

and an explanation how it can be used several times:

| Several sub_filter directives can be specified on one
| configuration level"

The description of the "sub_filter_once" directive specifies
that "look for each string to replace".

And I don't really understand the confusion about the
"sub_filter_last_modified" directive, since it related to
the response header, not "sub_filter" directives.

wbr, Valentin V. Bartenev
Jim Popovitch
2015-08-18 17:47:18 UTC
Permalink
Post by Valentin V. Bartenev
Could you elaborate what exactly is unclear from the docs
with citations? Then we will be able to improve it.
It looks like the docs were just updated, or perhaps I didn't clear my
browser cache often enough. :-) I don't believe any changes are
necessary now, and thank you for providing an example in the docs.
Post by Valentin V. Bartenev
And I don't really understand the confusion about the
"sub_filter_last_modified" directive, since it related to
the response header, not "sub_filter" directives.
That was just an oversight on my part, I was mostly concerned with how
sub_filter_once would apply to multiple sub_filter entries and that
was cleared up by Dmitry.

-Jim P.

Continue reading on narkive:
Loading...