From 6c8c21618bb94b89be7e9208eb60ef87790e0a58 Mon Sep 17 00:00:00 2001 From: "Jonathan Leibiusky @xetorthio" Date: Wed, 14 Jun 2017 20:59:50 -0300 Subject: [PATCH] More prometheus metrics --- docker-compose.yml | 13 +++++++++++++ prometheus.yml | 24 ++++++++++++++++++++++++ pwd/client.go | 8 +++++++- pwd/instance.go | 11 +++++++++++ pwd/pwd.go | 11 +++++++++++ pwd/session.go | 6 ++++++ 6 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 prometheus.yml diff --git a/docker-compose.yml b/docker-compose.yml index 36f2f20..e779202 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -39,5 +39,18 @@ services: - sessions:/pwd environment: GOOGLE_RECAPTCHA_DISABLED: "true" + prometheus: + container_name: prometheus + image: prom/prometheus + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + grafana: + container_name: grafana + image: grafana/grafana + ports: + - "3000:3000" + volumes: + - grafana:/var/lib/grafana volumes: sessions: + grafana: diff --git a/prometheus.yml b/prometheus.yml new file mode 100644 index 0000000..9af86d3 --- /dev/null +++ b/prometheus.yml @@ -0,0 +1,24 @@ +# my global config +global: + scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first.rules" + # - "second.rules" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'pwd' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['pwd1:3000', 'pwd2:3000'] diff --git a/pwd/client.go b/pwd/client.go index 79fec01..f50d758 100644 --- a/pwd/client.go +++ b/pwd/client.go @@ -1,6 +1,9 @@ package pwd -import "log" +import ( + "log" + "time" +) type Client struct { Id string @@ -14,12 +17,14 @@ type ViewPort struct { } func (p *pwd) ClientNew(id string, session *Session) *Client { + defer observeAction("ClientNew", time.Now()) c := &Client{Id: id, session: session} session.clients = append(session.clients, c) return c } func (p *pwd) ClientResizeViewPort(c *Client, cols, rows uint) { + defer observeAction("ClientResizeViewPort", time.Now()) c.viewPort.Rows = rows c.viewPort.Cols = cols @@ -27,6 +32,7 @@ func (p *pwd) ClientResizeViewPort(c *Client, cols, rows uint) { } func (p *pwd) ClientClose(client *Client) { + defer observeAction("ClientClose", time.Now()) // Client has disconnected. Remove from session and recheck terminal sizes. session := client.session for i, cl := range session.clients { diff --git a/pwd/instance.go b/pwd/instance.go index d079929..e0807f5 100644 --- a/pwd/instance.go +++ b/pwd/instance.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "sync" + "time" "github.com/play-with-docker/play-with-docker/config" "github.com/play-with-docker/play-with-docker/docker" @@ -90,6 +91,7 @@ func (i *Instance) SetSession(s *Session) { } func (p *pwd) InstanceResizeTerminal(instance *Instance, rows, cols uint) error { + defer observeAction("InstanceResizeTerminal", time.Now()) return p.docker.ContainerResize(instance.Name, rows, cols) } @@ -109,6 +111,7 @@ func (p *pwd) InstanceAttachTerminal(instance *Instance) error { } func (p *pwd) InstanceUploadFromUrl(instance *Instance, url string) error { + defer observeAction("InstanceUploadFromUrl", time.Now()) log.Printf("Downloading file [%s]\n", url) resp, err := http.Get(url) if err != nil { @@ -131,10 +134,12 @@ func (p *pwd) InstanceUploadFromUrl(instance *Instance, url string) error { } func (p *pwd) InstanceGet(session *Session, name string) *Instance { + defer observeAction("InstanceGet", time.Now()) return session.Instances[name] } func (p *pwd) InstanceFindByIP(ip string) *Instance { + defer observeAction("InstanceFindByIP", time.Now()) for _, s := range sessions { for _, i := range s.Instances { if i.IP == ip { @@ -146,6 +151,7 @@ func (p *pwd) InstanceFindByIP(ip string) *Instance { } func (p *pwd) InstanceFindByAlias(sessionPrefix, alias string) *Instance { + defer observeAction("InstanceFindByAlias", time.Now()) for id, s := range sessions { if strings.HasPrefix(id, sessionPrefix) { for _, i := range s.Instances { @@ -159,6 +165,7 @@ func (p *pwd) InstanceFindByAlias(sessionPrefix, alias string) *Instance { } func (p *pwd) InstanceDelete(session *Session, instance *Instance) error { + defer observeAction("InstanceDelete", time.Now()) if instance.conn != nil { instance.conn.Close() } @@ -193,6 +200,7 @@ func (p *pwd) checkHostnameExists(session *Session, hostname string) bool { } func (p *pwd) InstanceNew(session *Session, conf InstanceConfig) (*Instance, error) { + defer observeAction("InstanceNew", time.Now()) session.rw.Lock() defer session.rw.Unlock() @@ -274,12 +282,14 @@ func (p *pwd) InstanceNew(session *Session, conf InstanceConfig) (*Instance, err } func (p *pwd) InstanceWriteToTerminal(instance *Instance, data string) { + defer observeAction("InstanceWriteToTerminal", time.Now()) if instance != nil && instance.conn != nil && len(data) > 0 { instance.conn.Write([]byte(data)) } } func (p *pwd) InstanceAllowedImages() []string { + defer observeAction("InstanceAllowedImages", time.Now()) return []string{ config.GetDindImageName(), @@ -289,5 +299,6 @@ func (p *pwd) InstanceAllowedImages() []string { } func (p *pwd) InstanceExec(instance *Instance, cmd []string) (int, error) { + defer observeAction("InstanceExec", time.Now()) return p.docker.Exec(instance.Name, cmd) } diff --git a/pwd/pwd.go b/pwd/pwd.go index 6714b02..f001211 100644 --- a/pwd/pwd.go +++ b/pwd/pwd.go @@ -21,8 +21,18 @@ var ( Name: "instances", Help: "Instances", }) + + latencyHistogramVec = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Name: "pwd_action_duration_ms", + Help: "How long it took to process a specific action, in a specific host", + Buckets: []float64{300, 1200, 5000}, + }, []string{"action"}) ) +func observeAction(action string, start time.Time) { + latencyHistogramVec.WithLabelValues(action).Observe(float64(time.Since(start).Nanoseconds()) / 1000000) +} + var sessions map[string]*Session var sessionsMutex sync.Mutex @@ -30,6 +40,7 @@ func init() { prometheus.MustRegister(sessionsGauge) prometheus.MustRegister(clientsGauge) prometheus.MustRegister(instancesGauge) + prometheus.MustRegister(latencyHistogramVec) sessions = make(map[string]*Session) } diff --git a/pwd/session.go b/pwd/session.go index c6be9e4..e467065 100644 --- a/pwd/session.go +++ b/pwd/session.go @@ -52,6 +52,8 @@ type Session struct { } func (p *pwd) SessionNew(duration time.Duration, stack, stackName string) (*Session, error) { + defer observeAction("SessionNew", time.Now()) + sessionsMutex.Lock() defer sessionsMutex.Unlock() @@ -151,6 +153,7 @@ func (p *pwd) SessionGetSmallestViewPort(s *Session) ViewPort { } func (p *pwd) SessionDeployStack(s *Session) error { + defer observeAction("SessionDeployStack", time.Now()) if s.Ready { // a stack was already deployed on this session, just ignore return nil @@ -190,11 +193,13 @@ func (p *pwd) SessionDeployStack(s *Session) error { } func (p *pwd) SessionGet(sessionId string) *Session { + defer observeAction("SessionGet", time.Now()) s := sessions[sessionId] return s } func (p *pwd) SessionLoadAndPrepare() error { + defer observeAction("SessionLoadAndPrepare", time.Now()) err := p.storage.Load() if err != nil { return err @@ -232,6 +237,7 @@ func (p *pwd) SessionLoadAndPrepare() error { } func (p *pwd) SessionSetup(session *Session, conf SessionSetupConf) error { + defer observeAction("SessionSetup", time.Now()) var tokens *docker.SwarmTokens = nil var firstSwarmManager *Instance = nil