mirror of
https://github.com/coredns/coredns.git
synced 2026-01-17 22:31:18 -05:00
perf(proxy): use mutex-based connection pool (#7790)
* perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"math"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -73,30 +74,66 @@ func TestProxyTLSFail(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestProtocolSelection(t *testing.T) {
|
||||
p := NewProxy("TestProtocolSelection", "bad_address", transport.DNS)
|
||||
p.readTimeout = 10 * time.Millisecond
|
||||
testCases := []struct {
|
||||
name string
|
||||
requestTCP bool // true = TCP request, false = UDP request
|
||||
opts Options
|
||||
expectedProto string
|
||||
}{
|
||||
{"UDP request, no options", false, Options{}, "udp"},
|
||||
{"UDP request, ForceTCP", false, Options{ForceTCP: true}, "tcp"},
|
||||
{"UDP request, PreferUDP", false, Options{PreferUDP: true}, "udp"},
|
||||
{"UDP request, ForceTCP+PreferUDP", false, Options{ForceTCP: true, PreferUDP: true}, "tcp"},
|
||||
{"TCP request, no options", true, Options{}, "tcp"},
|
||||
{"TCP request, ForceTCP", true, Options{ForceTCP: true}, "tcp"},
|
||||
{"TCP request, PreferUDP", true, Options{PreferUDP: true}, "udp"},
|
||||
{"TCP request, ForceTCP+PreferUDP", true, Options{ForceTCP: true, PreferUDP: true}, "tcp"},
|
||||
}
|
||||
|
||||
stateUDP := request.Request{W: &test.ResponseWriter{}, Req: new(dns.Msg)}
|
||||
stateTCP := request.Request{W: &test.ResponseWriter{TCP: true}, Req: new(dns.Msg)}
|
||||
ctx := context.TODO()
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Track which protocol the server received (use channel to avoid data race)
|
||||
protoChan := make(chan string, 1)
|
||||
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
|
||||
// Determine protocol from the connection type
|
||||
if _, ok := w.RemoteAddr().(*net.TCPAddr); ok {
|
||||
protoChan <- "tcp"
|
||||
} else {
|
||||
protoChan <- "udp"
|
||||
}
|
||||
ret := new(dns.Msg)
|
||||
ret.SetReply(r)
|
||||
ret.Answer = append(ret.Answer, test.A("example.org. IN A 127.0.0.1"))
|
||||
w.WriteMsg(ret)
|
||||
})
|
||||
defer s.Close()
|
||||
|
||||
go func() {
|
||||
p.Connect(ctx, stateUDP, Options{})
|
||||
p.Connect(ctx, stateUDP, Options{ForceTCP: true})
|
||||
p.Connect(ctx, stateUDP, Options{PreferUDP: true})
|
||||
p.Connect(ctx, stateUDP, Options{PreferUDP: true, ForceTCP: true})
|
||||
p.Connect(ctx, stateTCP, Options{})
|
||||
p.Connect(ctx, stateTCP, Options{ForceTCP: true})
|
||||
p.Connect(ctx, stateTCP, Options{PreferUDP: true})
|
||||
p.Connect(ctx, stateTCP, Options{PreferUDP: true, ForceTCP: true})
|
||||
}()
|
||||
p := NewProxy("TestProtocolSelection", s.Addr, transport.DNS)
|
||||
p.readTimeout = 1 * time.Second
|
||||
p.Start(5 * time.Second)
|
||||
defer p.Stop()
|
||||
|
||||
for i, exp := range []string{"udp", "tcp", "udp", "tcp", "tcp", "tcp", "udp", "tcp"} {
|
||||
proto := <-p.transport.dial
|
||||
p.transport.ret <- nil
|
||||
if proto != exp {
|
||||
t.Errorf("Unexpected protocol in case %d, expected %q, actual %q", i, exp, proto)
|
||||
}
|
||||
m := new(dns.Msg)
|
||||
m.SetQuestion("example.org.", dns.TypeA)
|
||||
|
||||
req := request.Request{
|
||||
W: &test.ResponseWriter{TCP: tc.requestTCP},
|
||||
Req: m,
|
||||
}
|
||||
|
||||
resp, err := p.Connect(context.Background(), req, tc.opts)
|
||||
if err != nil {
|
||||
t.Fatalf("Connect failed: %v", err)
|
||||
}
|
||||
if resp == nil {
|
||||
t.Fatal("Expected response, got nil")
|
||||
}
|
||||
|
||||
receivedProto := <-protoChan
|
||||
if receivedProto != tc.expectedProto {
|
||||
t.Errorf("Expected protocol %q, but server received %q", tc.expectedProto, receivedProto)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user