correct EDNS responses (#96)

Tests updated as well and all the middleware. And Prometheus renamed to
metrics (directive is still prometheus).
This commit is contained in:
Miek Gieben
2016-04-09 16:17:53 +01:00
parent db3d689a8a
commit ad221f4b2a
19 changed files with 192 additions and 143 deletions

View File

@@ -0,0 +1,34 @@
# prometheus
This module enables prometheus metrics for CoreDNS.
The following metrics are exported:
* coredns_dns_request_count_total
* coredns_dns_request_duration_seconds
* coredns_dns_response_size_bytes
* coredns_dns_response_rcode_count_total
Each counter has a label `zone` which is the zonename used for the request/response,
and a label `qtype` which old the query type.
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode
of the response.
If monitoring is enabled queries that do not enter the middleware chain are exported
under the fake domain "dropped" (without a closing dot).
Restarting CoreDNS will stop the monitoring. This is a bug. Also [this upstream
Caddy bug](https://github.com/mholt/caddy/issues/675).
## Syntax
~~~
prometheus
~~~
For each zone that you want to see metrics for.
It optionally takes an address where the metrics are exported, the default
is `localhost:9135`. The metrics path is fixed to `/metrics`.
## Examples

View File

@@ -0,0 +1,42 @@
package metrics
import (
"time"
"golang.org/x/net/context"
"github.com/miekg/coredns/middleware"
"github.com/miekg/dns"
)
func (m Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
state := middleware.State{W: w, Req: r}
qname := state.Name()
qtype := state.Type()
zone := middleware.Zones(m.ZoneNames).Matches(qname)
if zone == "" {
zone = "."
}
// Record response to get status code and size of the reply.
rw := middleware.NewResponseRecorder(w)
status, err := m.Next.ServeDNS(ctx, rw, r)
Report(zone, qtype, rw.Rcode(), rw.Size(), rw.Start())
return status, err
}
// Report is a plain reporting function that the server can use for REFUSED and other
// queries that are turned down because they don't match any middleware.
func Report(zone, qtype, rcode string, size int, start time.Time) {
if requestCount == nil {
// no metrics are enabled
return
}
requestCount.WithLabelValues(zone, qtype).Inc()
requestDuration.WithLabelValues(zone, qtype).Observe(float64(time.Since(start) / time.Second))
responseSize.WithLabelValues(zone, qtype).Observe(float64(size))
responseRcode.WithLabelValues(zone, rcode, qtype).Inc()
}

View File

@@ -0,0 +1,86 @@
package metrics
import (
"log"
"net/http"
"sync"
"github.com/miekg/coredns/middleware"
"github.com/prometheus/client_golang/prometheus"
)
const namespace = "coredns"
var (
requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
responseSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
)
const path = "/metrics"
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
type Metrics struct {
Next middleware.Handler
Addr string // where to we listen
Once sync.Once
ZoneNames []string
}
func (m *Metrics) Start() error {
m.Once.Do(func() {
define("")
prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration)
prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseRcode)
http.Handle(path, prometheus.Handler())
go func() {
if err := http.ListenAndServe(m.Addr, nil); err != nil {
log.Printf("[ERROR] Failed to start prometheus handler: %s", err)
}
}()
})
return nil
}
func define(subsystem string) {
if subsystem == "" {
subsystem = "dns"
}
requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_count_total",
Help: "Counter of DNS requests made per zone and type and opcode.",
}, []string{"zone", "qtype"})
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_duration_seconds",
Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...),
Help: "Histogram of the time (in seconds) each request took.",
}, []string{"zone", "qtype"})
responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "response_size_bytes",
Help: "Size of the returns response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "qtype"})
responseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "response_rcode_count_total",
Help: "Counter of response status codes.",
}, []string{"zone", "rcode", "qtype"})
}
// Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
const Dropped = "dropped"