mirror of
https://github.com/coredns/coredns.git
synced 2026-05-09 03:33:30 -04:00
feat(cache): add optional verify timeout to serve_stale (#8070)
This commit is contained in:
6
plugin/cache/README.md
vendored
6
plugin/cache/README.md
vendored
@@ -40,7 +40,7 @@ cache [TTL] [ZONES...] {
|
|||||||
success CAPACITY [TTL] [MINTTL]
|
success CAPACITY [TTL] [MINTTL]
|
||||||
denial CAPACITY [TTL] [MINTTL]
|
denial CAPACITY [TTL] [MINTTL]
|
||||||
prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
|
prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
|
||||||
serve_stale [DURATION] [REFRESH_MODE]
|
serve_stale [DURATION] [REFRESH_MODE [VERIFY_TIMEOUT]]
|
||||||
servfail DURATION
|
servfail DURATION
|
||||||
disable success|denial [ZONES...]
|
disable success|denial [ZONES...]
|
||||||
keepttl
|
keepttl
|
||||||
@@ -74,6 +74,10 @@ cache [TTL] [ZONES...] {
|
|||||||
from ever being served if an updated response can be retrieved from the source.
|
from ever being served if an updated response can be retrieved from the source.
|
||||||
In `immediate` mode, concurrent requests for the same expired entry dispatch at most one
|
In `immediate` mode, concurrent requests for the same expired entry dispatch at most one
|
||||||
background refresh.
|
background refresh.
|
||||||
|
**VERIFY_TIMEOUT** is only valid with `verify` and bounds how long the cache waits for the upstream
|
||||||
|
verify before falling back to the stale entry. The verify continues in the background and refreshes the
|
||||||
|
cache when it eventually succeeds, so subsequent queries see the fresh entry. The default of `0` means
|
||||||
|
wait until the upstream's own timeout (the original `verify` behavior). Example: `serve_stale 1h verify 100ms`.
|
||||||
* `servfail` cache SERVFAIL responses for **DURATION**. Setting **DURATION** to 0 will disable caching of SERVFAIL
|
* `servfail` cache SERVFAIL responses for **DURATION**. Setting **DURATION** to 0 will disable caching of SERVFAIL
|
||||||
responses. If this option is not set, SERVFAIL responses will be cached for 5 seconds. **DURATION** may not be
|
responses. If this option is not set, SERVFAIL responses will be cached for 5 seconds. **DURATION** may not be
|
||||||
greater than 5 minutes.
|
greater than 5 minutes.
|
||||||
|
|||||||
1
plugin/cache/cache.go
vendored
1
plugin/cache/cache.go
vendored
@@ -44,6 +44,7 @@ type Cache struct {
|
|||||||
// Stale serve
|
// Stale serve
|
||||||
staleUpTo time.Duration
|
staleUpTo time.Duration
|
||||||
verifyStale bool
|
verifyStale bool
|
||||||
|
verifyStaleTimeout time.Duration // 0 means wait for upstream until its own timeout (current default).
|
||||||
|
|
||||||
// Positive/negative zone exceptions
|
// Positive/negative zone exceptions
|
||||||
pexcept []string
|
pexcept []string
|
||||||
|
|||||||
119
plugin/cache/cache_test.go
vendored
119
plugin/cache/cache_test.go
vendored
@@ -501,6 +501,103 @@ func TestServeFromStaleCacheFetchVerify(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestServeFromStaleCacheFetchVerifyTimeout(t *testing.T) {
|
||||||
|
// Verify that when verifyStaleTimeout is set and the upstream is slow,
|
||||||
|
// the client gets the stale entry within ~timeout, while the in-flight
|
||||||
|
// verify continues in the background and refreshes the cache.
|
||||||
|
c := New()
|
||||||
|
c.staleUpTo = 1 * time.Hour
|
||||||
|
c.verifyStale = true
|
||||||
|
c.verifyStaleTimeout = 50 * time.Millisecond
|
||||||
|
c.Next = ttlBackend(120)
|
||||||
|
|
||||||
|
req := new(dns.Msg)
|
||||||
|
req.SetQuestion("cached.org.", dns.TypeA)
|
||||||
|
ctx := context.TODO()
|
||||||
|
|
||||||
|
// Prime the cache with a 120s TTL entry.
|
||||||
|
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
|
c.ServeDNS(ctx, rec, req)
|
||||||
|
if c.pcache.Len() != 1 {
|
||||||
|
t.Fatalf("Msg with > 0 TTL should have been cached")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move forward past the TTL so the entry is stale.
|
||||||
|
c.now = func() time.Time { return time.Now().Add(3 * time.Minute) }
|
||||||
|
|
||||||
|
// Swap in a slow backend that takes longer than the verify timeout.
|
||||||
|
bgDone := make(chan struct{})
|
||||||
|
c.Next = slowTTLBackend(60, 200*time.Millisecond, bgDone)
|
||||||
|
|
||||||
|
rec = dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
|
start := time.Now()
|
||||||
|
ret, err := c.ServeDNS(ctx, rec, req.Copy())
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if ret != dns.RcodeSuccess {
|
||||||
|
t.Fatalf("expected RcodeSuccess, got %d", ret)
|
||||||
|
}
|
||||||
|
if elapsed > 150*time.Millisecond {
|
||||||
|
t.Errorf("expected response within ~timeout (50ms); took %v", elapsed)
|
||||||
|
}
|
||||||
|
if rec.Msg == nil || len(rec.Msg.Answer) == 0 {
|
||||||
|
t.Fatalf("expected an answer, got %+v", rec.Msg)
|
||||||
|
}
|
||||||
|
// Stale serve sets TTL to 0.
|
||||||
|
if got := rec.Msg.Answer[0].Header().Ttl; got != 0 {
|
||||||
|
t.Errorf("expected stale TTL=0, got %d", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the background verify to complete.
|
||||||
|
select {
|
||||||
|
case <-bgDone:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatalf("background verify never completed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestServeFromStaleCacheFetchVerifyTimeoutFastUpstream(t *testing.T) {
|
||||||
|
// When the upstream answers within the verify timeout, the client should
|
||||||
|
// receive the freshly verified response (not a stale one).
|
||||||
|
c := New()
|
||||||
|
c.staleUpTo = 1 * time.Hour
|
||||||
|
c.verifyStale = true
|
||||||
|
c.verifyStaleTimeout = 500 * time.Millisecond
|
||||||
|
c.Next = ttlBackend(120)
|
||||||
|
|
||||||
|
req := new(dns.Msg)
|
||||||
|
req.SetQuestion("cached.org.", dns.TypeA)
|
||||||
|
ctx := context.TODO()
|
||||||
|
|
||||||
|
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
|
c.ServeDNS(ctx, rec, req)
|
||||||
|
if c.pcache.Len() != 1 {
|
||||||
|
t.Fatalf("Msg with > 0 TTL should have been cached")
|
||||||
|
}
|
||||||
|
|
||||||
|
c.now = func() time.Time { return time.Now().Add(3 * time.Minute) }
|
||||||
|
// Fast upstream returning fresh TTL=200.
|
||||||
|
c.Next = ttlBackend(200)
|
||||||
|
|
||||||
|
rec = dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
|
ret, err := c.ServeDNS(ctx, rec, req.Copy())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if ret != dns.RcodeSuccess {
|
||||||
|
t.Fatalf("expected RcodeSuccess, got %d", ret)
|
||||||
|
}
|
||||||
|
if rec.Msg == nil || len(rec.Msg.Answer) == 0 {
|
||||||
|
t.Fatalf("expected an answer, got %+v", rec.Msg)
|
||||||
|
}
|
||||||
|
if got := rec.Msg.Answer[0].Header().Ttl; got != 200 {
|
||||||
|
t.Errorf("expected fresh TTL=200, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
|
func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
|
||||||
c := New()
|
c := New()
|
||||||
c.staleUpTo = time.Minute * 10
|
c.staleUpTo = time.Minute * 10
|
||||||
@@ -672,6 +769,28 @@ func servFailBackend(ttl int) plugin.Handler {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// slowTTLBackend wraps ttlBackend with a fixed delay to simulate a slow upstream.
|
||||||
|
// done is closed once the response is written so callers can synchronise with the
|
||||||
|
// background goroutine.
|
||||||
|
func slowTTLBackend(ttl int, delay time.Duration, done chan<- struct{}) plugin.Handler {
|
||||||
|
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||||
|
select {
|
||||||
|
case <-time.After(delay):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return dns.RcodeServerFailure, ctx.Err()
|
||||||
|
}
|
||||||
|
m := new(dns.Msg)
|
||||||
|
m.SetReply(r)
|
||||||
|
m.Response, m.RecursionAvailable = true, true
|
||||||
|
m.Answer = []dns.RR{test.A(fmt.Sprintf("example.org. %d IN A 127.0.0.53", ttl))}
|
||||||
|
w.WriteMsg(m)
|
||||||
|
if done != nil {
|
||||||
|
close(done)
|
||||||
|
}
|
||||||
|
return dns.RcodeSuccess, nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestComputeTTL(t *testing.T) {
|
func TestComputeTTL(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
msgTTL time.Duration
|
msgTTL time.Duration
|
||||||
|
|||||||
51
plugin/cache/handler.go
vendored
51
plugin/cache/handler.go
vendored
@@ -45,11 +45,20 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||||||
// serve stale behavior
|
// serve stale behavior
|
||||||
if c.verifyStale {
|
if c.verifyStale {
|
||||||
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do, cd: cd}
|
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do, cd: cd}
|
||||||
|
if c.verifyStaleTimeout > 0 {
|
||||||
|
// Background verify: cache the response but do not write to the wire.
|
||||||
|
// On timeout, we serve the stale entry below and let the goroutine continue.
|
||||||
|
crr.prefetch = true
|
||||||
|
}
|
||||||
cw := newVerifyStaleResponseWriter(crr)
|
cw := newVerifyStaleResponseWriter(crr)
|
||||||
|
if c.verifyStaleTimeout == 0 {
|
||||||
ret, err := c.doRefresh(ctx, state, cw)
|
ret, err := c.doRefresh(ctx, state, cw)
|
||||||
if cw.refreshed {
|
if cw.refreshed {
|
||||||
return ret, err
|
return ret, err
|
||||||
}
|
}
|
||||||
|
} else if served, ret, err := c.verifyWithTimeout(ctx, state, w, cw, r, do, ad); served {
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Adjust the time to get a 0 TTL in the reply built from a stale item.
|
// Adjust the time to get a 0 TTL in the reply built from a stale item.
|
||||||
@@ -121,6 +130,48 @@ func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw dns.Res
|
|||||||
return plugin.NextOrFailure(c.Name(), c.Next, ctx, cw, state.Req)
|
return plugin.NextOrFailure(c.Name(), c.Next, ctx, cw, state.Req)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// verifyWithTimeout runs the upstream verify in a background goroutine and races it
|
||||||
|
// against verifyStaleTimeout. If the verify completes within the timeout and the
|
||||||
|
// response is cacheable (NoError or NXDomain), the freshly cached entry is served
|
||||||
|
// to the client and served is true. Otherwise served is false and the caller falls
|
||||||
|
// through to serve stale; the goroutine continues to run and any successful response
|
||||||
|
// will update the cache without writing to the (now-detached) client connection.
|
||||||
|
func (c *Cache) verifyWithTimeout(ctx context.Context, state request.Request, w dns.ResponseWriter, cw *verifyStaleResponseWriter, r *dns.Msg, do, ad bool) (served bool, code int, err error) {
|
||||||
|
type result struct {
|
||||||
|
code int
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
done := make(chan result, 1)
|
||||||
|
go func() {
|
||||||
|
rc, re := c.doRefresh(ctx, state, cw)
|
||||||
|
done <- result{rc, re}
|
||||||
|
}()
|
||||||
|
timer := time.NewTimer(c.verifyStaleTimeout)
|
||||||
|
defer timer.Stop()
|
||||||
|
select {
|
||||||
|
case res := <-done:
|
||||||
|
if !cw.refreshed {
|
||||||
|
return false, 0, nil
|
||||||
|
}
|
||||||
|
fresh := c.exists(state.Name(), state.QType(), state.Do(), state.Req.CheckingDisabled)
|
||||||
|
if fresh == nil {
|
||||||
|
// Should not happen: refreshed=true means the upstream response was cacheable.
|
||||||
|
return true, res.code, res.err
|
||||||
|
}
|
||||||
|
now := c.now().UTC()
|
||||||
|
if c.keepttl {
|
||||||
|
now = fresh.stored
|
||||||
|
}
|
||||||
|
resp := fresh.toMsg(r, now, do, ad)
|
||||||
|
if err := w.WriteMsg(resp); err != nil {
|
||||||
|
return true, dns.RcodeServerFailure, err
|
||||||
|
}
|
||||||
|
return true, dns.RcodeSuccess, nil
|
||||||
|
case <-timer.C:
|
||||||
|
return false, 0, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Cache) shouldPrefetch(i *item, now time.Time) bool {
|
func (c *Cache) shouldPrefetch(i *item, now time.Time) bool {
|
||||||
if c.prefetch <= 0 {
|
if c.prefetch <= 0 {
|
||||||
return false
|
return false
|
||||||
|
|||||||
16
plugin/cache/setup.go
vendored
16
plugin/cache/setup.go
vendored
@@ -172,7 +172,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
|||||||
|
|
||||||
case "serve_stale":
|
case "serve_stale":
|
||||||
args := c.RemainingArgs()
|
args := c.RemainingArgs()
|
||||||
if len(args) > 2 {
|
if len(args) > 3 {
|
||||||
return nil, c.ArgErr()
|
return nil, c.ArgErr()
|
||||||
}
|
}
|
||||||
ca.staleUpTo = 1 * time.Hour
|
ca.staleUpTo = 1 * time.Hour
|
||||||
@@ -187,6 +187,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
|||||||
ca.staleUpTo = d
|
ca.staleUpTo = d
|
||||||
}
|
}
|
||||||
ca.verifyStale = false
|
ca.verifyStale = false
|
||||||
|
ca.verifyStaleTimeout = 0
|
||||||
if len(args) > 1 {
|
if len(args) > 1 {
|
||||||
mode := strings.ToLower(args[1])
|
mode := strings.ToLower(args[1])
|
||||||
if mode != "immediate" && mode != "verify" {
|
if mode != "immediate" && mode != "verify" {
|
||||||
@@ -194,6 +195,19 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
|||||||
}
|
}
|
||||||
ca.verifyStale = mode == "verify"
|
ca.verifyStale = mode == "verify"
|
||||||
}
|
}
|
||||||
|
if len(args) > 2 {
|
||||||
|
if !ca.verifyStale {
|
||||||
|
return nil, errors.New("serve_stale timeout is only valid with the verify refresh mode")
|
||||||
|
}
|
||||||
|
t, err := time.ParseDuration(args[2])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid serve_stale verify timeout: %w", err)
|
||||||
|
}
|
||||||
|
if t < 0 {
|
||||||
|
return nil, errors.New("invalid negative timeout for serve_stale verify")
|
||||||
|
}
|
||||||
|
ca.verifyStaleTimeout = t
|
||||||
|
}
|
||||||
case "servfail":
|
case "servfail":
|
||||||
args := c.RemainingArgs()
|
args := c.RemainingArgs()
|
||||||
if len(args) != 1 {
|
if len(args) != 1 {
|
||||||
|
|||||||
40
plugin/cache/setup_test.go
vendored
40
plugin/cache/setup_test.go
vendored
@@ -121,21 +121,29 @@ func TestServeStale(t *testing.T) {
|
|||||||
shouldErr bool
|
shouldErr bool
|
||||||
staleUpTo time.Duration
|
staleUpTo time.Duration
|
||||||
verifyStale bool
|
verifyStale bool
|
||||||
|
verifyStaleTimeout time.Duration
|
||||||
}{
|
}{
|
||||||
{"serve_stale", false, 1 * time.Hour, false},
|
{"serve_stale", false, 1 * time.Hour, false, 0},
|
||||||
{"serve_stale 20m", false, 20 * time.Minute, false},
|
{"serve_stale 20m", false, 20 * time.Minute, false, 0},
|
||||||
{"serve_stale 1h20m", false, 80 * time.Minute, false},
|
{"serve_stale 1h20m", false, 80 * time.Minute, false, 0},
|
||||||
{"serve_stale 0m", false, 0, false},
|
{"serve_stale 0m", false, 0, false, 0},
|
||||||
{"serve_stale 0", false, 0, false},
|
{"serve_stale 0", false, 0, false, 0},
|
||||||
{"serve_stale 0 verify", false, 0, true},
|
{"serve_stale 0 verify", false, 0, true, 0},
|
||||||
{"serve_stale 0 immediate", false, 0, false},
|
{"serve_stale 0 immediate", false, 0, false, 0},
|
||||||
{"serve_stale 0 VERIFY", false, 0, true},
|
{"serve_stale 0 VERIFY", false, 0, true, 0},
|
||||||
|
{"serve_stale 1h verify 100ms", false, 1 * time.Hour, true, 100 * time.Millisecond},
|
||||||
|
{"serve_stale 1h verify 0", false, 1 * time.Hour, true, 0},
|
||||||
|
{"serve_stale 1h VERIFY 250ms", false, 1 * time.Hour, true, 250 * time.Millisecond},
|
||||||
// fails
|
// fails
|
||||||
{"serve_stale 20", true, 0, false},
|
{"serve_stale 20", true, 0, false, 0},
|
||||||
{"serve_stale -20m", true, 0, false},
|
{"serve_stale -20m", true, 0, false, 0},
|
||||||
{"serve_stale aa", true, 0, false},
|
{"serve_stale aa", true, 0, false, 0},
|
||||||
{"serve_stale 1m nono", true, 0, false},
|
{"serve_stale 1m nono", true, 0, false, 0},
|
||||||
{"serve_stale 0 after nono", true, 0, false},
|
{"serve_stale 0 after nono", true, 0, false, 0},
|
||||||
|
{"serve_stale 1h immediate 100ms", true, 0, false, 0},
|
||||||
|
{"serve_stale 1h verify -1ms", true, 0, false, 0},
|
||||||
|
{"serve_stale 1h verify garbage", true, 0, false, 0},
|
||||||
|
{"serve_stale 1h verify 100ms extra", true, 0, false, 0},
|
||||||
}
|
}
|
||||||
for i, test := range tests {
|
for i, test := range tests {
|
||||||
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))
|
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))
|
||||||
@@ -153,6 +161,12 @@ func TestServeStale(t *testing.T) {
|
|||||||
if ca.staleUpTo != test.staleUpTo {
|
if ca.staleUpTo != test.staleUpTo {
|
||||||
t.Errorf("Test %v: Expected stale %v but found: %v", i, test.staleUpTo, ca.staleUpTo)
|
t.Errorf("Test %v: Expected stale %v but found: %v", i, test.staleUpTo, ca.staleUpTo)
|
||||||
}
|
}
|
||||||
|
if ca.verifyStale != test.verifyStale {
|
||||||
|
t.Errorf("Test %v: Expected verifyStale %v but found: %v", i, test.verifyStale, ca.verifyStale)
|
||||||
|
}
|
||||||
|
if ca.verifyStaleTimeout != test.verifyStaleTimeout {
|
||||||
|
t.Errorf("Test %v: Expected verifyStaleTimeout %v but found: %v", i, test.verifyStaleTimeout, ca.verifyStaleTimeout)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user