fix(cache): prefer positive cache over SERVFAIL in ncache (#8003)

When serve_stale is enabled, a cached SERVFAIL in ncache shadows a valid
positive entry in pcache because ncache is always checked first. SERVFAIL
is transient and should not mask a known-good answer.

When the ncache hit is a SERVFAIL, check pcache for a valid entry before
returning the SERVFAIL. NXDOMAIN and NODATA are unaffected and still
follow the existing ncache-first lookup per RFC 2308.

Fixes #7956

Signed-off-by: umut-polat <52835619+umut-polat@users.noreply.github.com>
This commit is contained in:
Umut Polat
2026-05-20 06:28:35 +03:00
committed by GitHub
parent 35391dd8a9
commit b1a7fc8db1
2 changed files with 50 additions and 0 deletions

View File

@@ -1027,3 +1027,42 @@ func wildcardMetadataBackend(qname, wildcard string) plugin.Handler {
return dns.RcodeSuccess, nil
})
}
func TestServfailDoesNotShadowPositiveCache(t *testing.T) {
c := New()
c.staleUpTo = time.Hour // enable serve_stale
c.failttl = 5 * time.Second
now := time.Now()
c.now = func() time.Time { return now }
// Manually insert a positive entry in pcache (stored 30s ago, TTL 120s -> still valid).
posMsg := new(dns.Msg)
posMsg.SetQuestion("example.org.", dns.TypeA)
posMsg.Response = true
posMsg.Answer = []dns.RR{test.A("example.org. 120 IN A 127.0.0.53")}
posItem := newItem(posMsg, now.Add(-30*time.Second), 120*time.Second)
k := hash("example.org.", dns.TypeA, false, false)
c.pcache.Add(k, posItem)
// Manually insert a SERVFAIL entry in ncache (stored just now, TTL 5s).
failMsg := new(dns.Msg)
failMsg.SetQuestion("example.org.", dns.TypeA)
failMsg.Response = true
failMsg.Rcode = dns.RcodeServerFailure
failMsg.Ns = []dns.RR{test.SOA("example.org. 5 IN SOA sns.dns.icann.org. noc.dns.icann.org. 2016082540 7200 3600 1209600 3600")}
failItem := newItem(failMsg, now, 5*time.Second)
c.ncache.Add(k, failItem)
// Lookup should prefer the positive entry over the SERVFAIL.
req := new(dns.Msg)
req.SetQuestion("example.org.", dns.TypeA)
state := request.Request{W: &test.ResponseWriter{}, Req: req}
got := c.getIfNotStale(now.Add(time.Second), state, "test")
if got == nil {
t.Fatal("expected a cached item, got nil")
}
if got.Rcode != dns.RcodeSuccess {
t.Fatalf("expected positive cache entry (rcode 0), got rcode %d", got.Rcode)
}
}

View File

@@ -192,6 +192,17 @@ func (c *Cache) getIfNotStale(now time.Time, state request.Request, server strin
if i, ok := c.ncache.Get(k); ok {
ttl := i.ttl(now)
if i.matches(state) && (ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds()))) {
// SERVFAIL is transient; prefer a valid positive cache entry if one
// exists, so a cached SERVFAIL does not shadow a previously good answer.
if i.Rcode == dns.RcodeServerFailure {
if p, pok := c.pcache.Get(k); pok {
pttl := p.ttl(now)
if p.matches(state) && (pttl > 0 || (c.staleUpTo > 0 && -pttl < int(c.staleUpTo.Seconds()))) {
cacheHits.WithLabelValues(server, Success, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return p
}
}
}
cacheHits.WithLabelValues(server, Denial, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return i
}