mirror of
https://github.com/coredns/coredns.git
synced 2026-01-16 13:51:19 -05:00
perf(proxy): use mutex-based connection pool (#7790)
* perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
This commit is contained in:
@@ -44,6 +44,7 @@ forward FROM TO... {
|
||||
force_tcp
|
||||
prefer_udp
|
||||
expire DURATION
|
||||
max_idle_conns INTEGER
|
||||
max_fails INTEGER
|
||||
max_connect_attempts INTEGER
|
||||
tls CERT KEY CA
|
||||
@@ -71,6 +72,8 @@ forward FROM TO... {
|
||||
performed for a single incoming DNS request. Default value of 0 means no per-request
|
||||
cap.
|
||||
* `expire` **DURATION**, expire (cached) connections after this time, the default is 10s.
|
||||
* `max_idle_conns` **INTEGER**, maximum number of idle connections to cache per upstream for reuse.
|
||||
Default is 0, which means unlimited.
|
||||
* `tls` **CERT** **KEY** **CA** define the TLS properties for TLS connection. From 0 to 3 arguments can be
|
||||
provided with the meaning as described below
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ type Forward struct {
|
||||
tlsServerName string
|
||||
maxfails uint32
|
||||
expire time.Duration
|
||||
maxIdleConns int
|
||||
maxConcurrent int64
|
||||
failfastUnhealthyUpstreams bool
|
||||
failoverRcodes []int
|
||||
|
||||
@@ -196,6 +196,7 @@ func parseStanza(c *caddy.Controller) (*Forward, error) {
|
||||
}
|
||||
}
|
||||
f.proxies[i].SetExpire(f.expire)
|
||||
f.proxies[i].SetMaxIdleConns(f.maxIdleConns)
|
||||
f.proxies[i].GetHealthchecker().SetRecursionDesired(f.opts.HCRecursionDesired)
|
||||
// when TLS is used, checks are set to tcp-tls
|
||||
if f.opts.ForceTCP && transports[i] != transport.TLS {
|
||||
@@ -311,6 +312,18 @@ func parseBlock(c *caddy.Controller, f *Forward) error {
|
||||
return fmt.Errorf("expire can't be negative: %s", dur)
|
||||
}
|
||||
f.expire = dur
|
||||
case "max_idle_conns":
|
||||
if !c.NextArg() {
|
||||
return c.ArgErr()
|
||||
}
|
||||
n, err := strconv.Atoi(c.Val())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if n < 0 {
|
||||
return fmt.Errorf("max_idle_conns can't be negative: %d", n)
|
||||
}
|
||||
f.maxIdleConns = n
|
||||
case "policy":
|
||||
if !c.NextArg() {
|
||||
return c.ArgErr()
|
||||
|
||||
@@ -365,6 +365,48 @@ func TestSetupMaxConnectAttempts(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetupMaxIdleConns(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
shouldErr bool
|
||||
expectedVal int
|
||||
expectedErr string
|
||||
}{
|
||||
{"forward . 127.0.0.1\n", false, 0, ""},
|
||||
{"forward . 127.0.0.1 {\nmax_idle_conns 10\n}\n", false, 10, ""},
|
||||
{"forward . 127.0.0.1 {\nmax_idle_conns 0\n}\n", false, 0, ""},
|
||||
{"forward . 127.0.0.1 {\nmax_idle_conns many\n}\n", true, 0, "invalid"},
|
||||
{"forward . 127.0.0.1 {\nmax_idle_conns -1\n}\n", true, 0, "negative"},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
c := caddy.NewTestController("dns", test.input)
|
||||
fs, err := parseForward(c)
|
||||
|
||||
if test.shouldErr && err == nil {
|
||||
t.Errorf("Test %d: expected error but found none for input %s", i, test.input)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if !test.shouldErr {
|
||||
t.Errorf("Test %d: expected no error but found one for input %s, got: %v", i, test.input, err)
|
||||
}
|
||||
|
||||
if !strings.Contains(err.Error(), test.expectedErr) {
|
||||
t.Errorf("Test %d: expected error to contain: %v, found error: %v, input: %s", i, test.expectedErr, err, test.input)
|
||||
}
|
||||
}
|
||||
|
||||
if test.shouldErr {
|
||||
continue
|
||||
}
|
||||
f := fs[0]
|
||||
if f.maxIdleConns != test.expectedVal {
|
||||
t.Errorf("Test %d: expected: %d, got: %d", i, test.expectedVal, f.maxIdleConns)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetupHealthCheck(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
|
||||
Reference in New Issue
Block a user