From 689d753264d34373b0329001c6d026e1c2636091 Mon Sep 17 00:00:00 2001 From: FreddleSpl0it <75116288+FreddleSpl0it@users.noreply.github.com> Date: Wed, 20 May 2026 20:54:51 +0200 Subject: [PATCH] [Agent] Replace dockerapi container with Redis-based control bus --- data/Dockerfiles/acme/Dockerfile | 16 +- data/Dockerfiles/acme/acme.sh | 6 +- .../Dockerfiles/acme/reload-configurations.sh | 56 +- data/Dockerfiles/agent/Dockerfile | 34 + data/Dockerfiles/agent/README.md | 16 + .../agent/cmd/mailcow-agent/main.go | 278 +++++++ data/Dockerfiles/agent/go.mod | 13 + data/Dockerfiles/agent/go.sum | 6 + data/Dockerfiles/agent/internal/bus/bus.go | 175 +++++ data/Dockerfiles/agent/internal/bus/lru.go | 38 + .../agent/internal/commands/commands.go | 83 +++ .../agent/internal/commands/shell.go | 60 ++ .../agent/internal/envelope/envelope.go | 34 + data/Dockerfiles/agent/internal/proc/proc.go | 253 +++++++ .../Dockerfiles/agent/internal/proc/signal.go | 14 + .../agent/internal/registry/registry.go | 97 +++ .../agent/internal/services/clock.go | 9 + .../agent/internal/services/dovecot.go | 294 ++++++++ .../agent/internal/services/generic.go | 37 + .../agent/internal/services/helpers.go | 79 ++ .../agent/internal/services/host.go | 236 ++++++ .../agent/internal/services/nginx.go | 39 + .../agent/internal/services/postfix.go | 142 ++++ .../agent/internal/services/probes.go | 81 ++ .../agent/internal/services/rspamd.go | 86 +++ .../agent/internal/services/services.go | 87 +++ .../agent/internal/services/sogo.go | 36 + .../agent/internal/services/unbound.go | 26 + .../Dockerfiles/agent/internal/stats/stats.go | 155 ++++ data/Dockerfiles/agent/mailcow-agent-cli | 58 ++ data/Dockerfiles/clamd/Dockerfile | 20 +- data/Dockerfiles/dockerapi/Dockerfile | 27 - .../dockerapi/docker-entrypoint.sh | 9 - data/Dockerfiles/dockerapi/main.py | 261 ------- .../dockerapi/modules/DockerApi.py | 626 ---------------- .../Dockerfiles/dockerapi/modules/__init__.py | 0 data/Dockerfiles/dovecot/Dockerfile | 17 +- data/Dockerfiles/dovecot/sa-rules.sh | 13 +- .../dovecot/syslog-ng-redis_slave.conf | 2 +- data/Dockerfiles/dovecot/syslog-ng.conf | 2 +- data/Dockerfiles/host-agent/Dockerfile | 21 + data/Dockerfiles/netfilter/Dockerfile | 16 +- data/Dockerfiles/nginx/Dockerfile | 17 +- data/Dockerfiles/olefy/Dockerfile | 16 +- data/Dockerfiles/phpfpm/Dockerfile | 18 +- data/Dockerfiles/phpfpm/docker-entrypoint.sh | 74 +- data/Dockerfiles/postfix-tlspol/Dockerfile | 16 +- .../postfix-tlspol/syslog-ng-redis_slave.conf | 2 +- .../Dockerfiles/postfix-tlspol/syslog-ng.conf | 2 +- data/Dockerfiles/postfix/Dockerfile | 16 +- .../postfix/syslog-ng-redis_slave.conf | 2 +- data/Dockerfiles/postfix/syslog-ng.conf | 2 +- data/Dockerfiles/rspamd/Dockerfile | 18 +- data/Dockerfiles/sogo/Dockerfile | 16 +- .../sogo/syslog-ng-redis_slave.conf | 2 +- data/Dockerfiles/sogo/syslog-ng.conf | 2 +- data/Dockerfiles/unbound/Dockerfile | 19 +- data/Dockerfiles/watchdog/Dockerfile | 16 +- data/Dockerfiles/watchdog/watchdog.sh | 692 ++---------------- data/web/admin/dashboard.php | 123 +++- data/web/inc/ajax/container_ctrl.php | 94 +-- data/web/inc/functions.agent.inc.php | 281 +++++++ data/web/inc/functions.docker.inc.php | 207 ------ data/web/inc/functions.fail2ban.inc.php | 4 +- data/web/inc/functions.inc.php | 31 +- data/web/inc/functions.mailbox.inc.php | 147 ++-- data/web/inc/functions.mailq.inc.php | 259 ++++--- data/web/inc/prerequisites.inc.php | 10 +- data/web/js/build/013-mailcow.js | 12 +- data/web/js/site/dashboard.js | 224 ++---- data/web/json_api.php | 62 +- data/web/lang/lang.de-de.json | 29 +- data/web/lang/lang.en-gb.json | 29 +- data/web/templates/dashboard.twig | 120 ++- docker-compose.yml | 82 ++- 75 files changed, 3740 insertions(+), 2462 deletions(-) create mode 100644 data/Dockerfiles/agent/Dockerfile create mode 100644 data/Dockerfiles/agent/README.md create mode 100644 data/Dockerfiles/agent/cmd/mailcow-agent/main.go create mode 100644 data/Dockerfiles/agent/go.mod create mode 100644 data/Dockerfiles/agent/go.sum create mode 100644 data/Dockerfiles/agent/internal/bus/bus.go create mode 100644 data/Dockerfiles/agent/internal/bus/lru.go create mode 100644 data/Dockerfiles/agent/internal/commands/commands.go create mode 100644 data/Dockerfiles/agent/internal/commands/shell.go create mode 100644 data/Dockerfiles/agent/internal/envelope/envelope.go create mode 100644 data/Dockerfiles/agent/internal/proc/proc.go create mode 100644 data/Dockerfiles/agent/internal/proc/signal.go create mode 100644 data/Dockerfiles/agent/internal/registry/registry.go create mode 100644 data/Dockerfiles/agent/internal/services/clock.go create mode 100644 data/Dockerfiles/agent/internal/services/dovecot.go create mode 100644 data/Dockerfiles/agent/internal/services/generic.go create mode 100644 data/Dockerfiles/agent/internal/services/helpers.go create mode 100644 data/Dockerfiles/agent/internal/services/host.go create mode 100644 data/Dockerfiles/agent/internal/services/nginx.go create mode 100644 data/Dockerfiles/agent/internal/services/postfix.go create mode 100644 data/Dockerfiles/agent/internal/services/probes.go create mode 100644 data/Dockerfiles/agent/internal/services/rspamd.go create mode 100644 data/Dockerfiles/agent/internal/services/services.go create mode 100644 data/Dockerfiles/agent/internal/services/sogo.go create mode 100644 data/Dockerfiles/agent/internal/services/unbound.go create mode 100644 data/Dockerfiles/agent/internal/stats/stats.go create mode 100644 data/Dockerfiles/agent/mailcow-agent-cli delete mode 100644 data/Dockerfiles/dockerapi/Dockerfile delete mode 100755 data/Dockerfiles/dockerapi/docker-entrypoint.sh delete mode 100644 data/Dockerfiles/dockerapi/main.py delete mode 100644 data/Dockerfiles/dockerapi/modules/DockerApi.py delete mode 100644 data/Dockerfiles/dockerapi/modules/__init__.py create mode 100644 data/Dockerfiles/host-agent/Dockerfile create mode 100644 data/web/inc/functions.agent.inc.php delete mode 100644 data/web/inc/functions.docker.inc.php diff --git a/data/Dockerfiles/acme/Dockerfile b/data/Dockerfiles/acme/Dockerfile index 193ca7e00..ca096a216 100644 --- a/data/Dockerfiles/acme/Dockerfile +++ b/data/Dockerfiles/acme/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.23 LABEL maintainer = "The Infrastructure Company GmbH " @@ -35,4 +39,14 @@ COPY expand6.sh /srv/expand6.sh RUN chmod +x /srv/*.sh -CMD ["/sbin/tini", "-g", "--", "/srv/acme.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=acme \ + MAILCOW_AGENT_MAIN_CMD="/sbin/tini -g -- /srv/acme.sh" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/acme/acme.sh b/data/Dockerfiles/acme/acme.sh index 271de4fc9..ca248cb67 100755 --- a/data/Dockerfiles/acme/acme.sh +++ b/data/Dockerfiles/acme/acme.sh @@ -63,11 +63,11 @@ if [[ "${SKIP_LETS_ENCRYPT}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then exec $(readlink -f "$0") fi -log_f "Waiting for Docker API..." -until ping dockerapi -c1 > /dev/null; do +log_f "Waiting for Redis control bus..." +until redis-cli -h "${REDIS_SLAVEOF_IP:-redis-mailcow}" -p "${REDIS_SLAVEOF_PORT:-6379}" -a "${REDISPASS}" --no-auth-warning ping > /dev/null 2>&1; do sleep 1 done -log_f "Docker API OK" +log_f "Redis control bus OK" log_f "Waiting for Postfix..." until ping postfix -c1 > /dev/null; do diff --git a/data/Dockerfiles/acme/reload-configurations.sh b/data/Dockerfiles/acme/reload-configurations.sh index 8d194b68b..2b0e4ee00 100644 --- a/data/Dockerfiles/acme/reload-configurations.sh +++ b/data/Dockerfiles/acme/reload-configurations.sh @@ -1,45 +1,29 @@ #!/bin/bash +# Tell every live replica of nginx / dovecot / postfix to reload (or restart +# on cert-amount change) via the mailcow-agent control bus. Replaces the old +# dockerapi-based container_id lookup + exec dance. -# Reading container IDs -# Wrapping as array to ensure trimmed content when calling $NGINX etc. -NGINX=($(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" | jq -rc "select( .name | tostring | contains(\"nginx-mailcow\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id" | tr "\n" " ")) -DOVECOT=($(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" | jq -rc "select( .name | tostring | contains(\"dovecot-mailcow\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id" | tr "\n" " ")) -POSTFIX=($(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" | jq -rc "select( .name | tostring | contains(\"postfix-mailcow\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id" | tr "\n" " ")) - -reload_nginx(){ - echo "Reloading Nginx..." - NGINX_RELOAD_RET=$(curl -X POST --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${NGINX}/exec -d '{"cmd":"reload", "task":"nginx"}' --silent -H 'Content-type: application/json' | jq -r .type) - [[ ${NGINX_RELOAD_RET} != 'success' ]] && { echo "Could not reload Nginx, restarting container..."; restart_container ${NGINX} ; } +reload_service() { + local svc="$1" + echo "Reloading ${svc} via mailcow-agent..." + if ! mailcow-agent-cli send "${svc}" reload >/dev/null; then + echo "Could not publish reload to ${svc}, attempting restart..." + mailcow-agent-cli send "${svc}" restart >/dev/null || true + fi } -reload_dovecot(){ - echo "Reloading Dovecot..." - DOVECOT_RELOAD_RET=$(curl -X POST --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${DOVECOT}/exec -d '{"cmd":"reload", "task":"dovecot"}' --silent -H 'Content-type: application/json' | jq -r .type) - [[ ${DOVECOT_RELOAD_RET} != 'success' ]] && { echo "Could not reload Dovecot, restarting container..."; restart_container ${DOVECOT} ; } -} - -reload_postfix(){ - echo "Reloading Postfix..." - POSTFIX_RELOAD_RET=$(curl -X POST --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${POSTFIX}/exec -d '{"cmd":"reload", "task":"postfix"}' --silent -H 'Content-type: application/json' | jq -r .type) - [[ ${POSTFIX_RELOAD_RET} != 'success' ]] && { echo "Could not reload Postfix, restarting container..."; restart_container ${POSTFIX} ; } -} - -restart_container(){ - for container in $*; do - echo "Restarting ${container}..." - C_REST_OUT=$(curl -X POST --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${container}/restart --silent | jq -r '.msg') - echo "${C_REST_OUT}" - done +restart_service() { + local svc="$1" + echo "Restarting ${svc} via mailcow-agent..." + mailcow-agent-cli send "${svc}" restart >/dev/null || true } if [[ "${CERT_AMOUNT_CHANGED}" == "1" ]]; then - restart_container ${NGINX} - restart_container ${DOVECOT} - restart_container ${POSTFIX} + restart_service nginx + restart_service dovecot + restart_service postfix else - reload_nginx - #reload_dovecot - restart_container ${DOVECOT} - #reload_postfix - restart_container ${POSTFIX} + reload_service nginx + restart_service dovecot + restart_service postfix fi diff --git a/data/Dockerfiles/agent/Dockerfile b/data/Dockerfiles/agent/Dockerfile new file mode 100644 index 000000000..5fb1b5af8 --- /dev/null +++ b/data/Dockerfiles/agent/Dockerfile @@ -0,0 +1,34 @@ +# Builder image for mailcow-agent. Each service Dockerfile pulls the static +# binary from here via: +# +# COPY --from=ghcr.io/mailcow/agent:VERSION /out/mailcow-agent /usr/local/bin/mailcow-agent +# +# For local development: build this image first. +# +# docker build -t ghcr.io/mailcow/agent:dev data/Dockerfiles/agent/ +# +# CI publishes a versioned tag and the service Dockerfiles pin against it via +# ARG AGENT_IMAGE. + +FROM golang:1.22-alpine AS build + +ENV CGO_ENABLED=0 \ + GOOS=linux + +WORKDIR /src + +COPY go.mod go.sum* ./ +RUN go mod download + +COPY . . + +RUN mkdir -p /out \ + && go build -trimpath -ldflags="-s -w" \ + -o /out/mailcow-agent ./cmd/mailcow-agent \ + && cp mailcow-agent-cli /out/mailcow-agent-cli \ + && chmod +x /out/mailcow-agent-cli + +# Final stage: tiny image whose only purpose is to be a COPY --from source. +FROM scratch +COPY --from=build /out/mailcow-agent /out/mailcow-agent +COPY --from=build /out/mailcow-agent-cli /out/mailcow-agent-cli diff --git a/data/Dockerfiles/agent/README.md b/data/Dockerfiles/agent/README.md new file mode 100644 index 000000000..e0e0b4750 --- /dev/null +++ b/data/Dockerfiles/agent/README.md @@ -0,0 +1,16 @@ +# mailcow-agent + +Each mailcow service container (postfix, dovecot, …) runs `mailcow-agent` as +ENTRYPOINT. It supervises the original service main process and exposes its +control commands over a Redis Pub/Sub bus: + +- `mailcow.control.` — request channel (Backend → Agent) +- `mailcow.reply.` — per-request reply channel +- `mailcow.events.` — broadcast events +- `mailcow.nodes.` (ZSET) + `mailcow.node..` (HASH) — heartbeat registry +- `mailcow.stats..` (HASH) — per-node cpu/memory stats + +Service behaviour is selected via `MAILCOW_AGENT_SERVICE=`. The main +process command is configured via `MAILCOW_AGENT_MAIN_CMD` (string, executed via +`sh -c` so existing entrypoints/supervisord commands keep working). + diff --git a/data/Dockerfiles/agent/cmd/mailcow-agent/main.go b/data/Dockerfiles/agent/cmd/mailcow-agent/main.go new file mode 100644 index 000000000..b153d3e78 --- /dev/null +++ b/data/Dockerfiles/agent/cmd/mailcow-agent/main.go @@ -0,0 +1,278 @@ +// Per-container control-bus subscriber. Subscribes to mailcow.control. +// on Redis, runs handlers from the per-service command table, publishes +// heartbeats and stats. Optionally supervises a child process. +package main + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "os/signal" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/redis/go-redis/v9" + + "github.com/mailcow/mailcow-dockerized/agent/internal/bus" + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" + "github.com/mailcow/mailcow-dockerized/agent/internal/registry" + "github.com/mailcow/mailcow-dockerized/agent/internal/services" + "github.com/mailcow/mailcow-dockerized/agent/internal/stats" +) + +const agentVersion = "0.1.0" + +// atomicSignal shares the last caught terminal signal between the handler +// goroutine and main() so it can be forwarded to the supervised child. +type atomicSignal struct{ v atomic.Int32 } + +func (a *atomicSignal) Store(s syscall.Signal) { a.v.Store(int32(s)) } +func (a *atomicSignal) Load() os.Signal { return syscall.Signal(a.v.Load()) } + +// healthState holds the latest health probe result. Written by the probe loop, +// read by the heartbeat loop. +type healthState struct { + mu sync.RWMutex + ok bool + detail string + at time.Time +} + +func (h *healthState) Set(ok bool, detail string) { + h.mu.Lock() + h.ok = ok + h.detail = detail + h.at = time.Now() + h.mu.Unlock() +} + +func (h *healthState) Snapshot() (ok bool, detail string, at time.Time) { + h.mu.RLock() + defer h.mu.RUnlock() + return h.ok, h.detail, h.at +} + +func main() { + service := strings.TrimSpace(os.Getenv("MAILCOW_AGENT_SERVICE")) + if service == "" { + fmt.Fprintf(os.Stderr, "mailcow-agent: MAILCOW_AGENT_SERVICE is required. Known: %v\n", services.Known()) + os.Exit(2) + } + + // `mailcow-agent healthcheck` runs the probe once and exits 0/1 + if len(os.Args) > 1 && os.Args[1] == "healthcheck" { + runHealthcheckOnce(service) + } + + nodeID := strings.TrimSpace(os.Getenv("MAILCOW_AGENT_NODE_ID")) + if nodeID == "" { + h, err := os.Hostname() + if err != nil { + log.Fatalf("mailcow-agent: hostname: %v", err) + } + nodeID = h + } + + mainCmd := strings.TrimSpace(os.Getenv("MAILCOW_AGENT_MAIN_CMD")) + // host-agent has no supervised child; everything else runs one. + wantsSupervisor := service != "host" && mainCmd != "" + + rdb, err := newRedis() + if err != nil { + log.Fatalf("mailcow-agent: redis: %v", err) + } + defer rdb.Close() + + // Start the supervised process before serving bus requests — restart/stop + // handlers assume something is already running. + var sup *proc.Supervisor + if wantsSupervisor { + sup = proc.New(mainCmd) + if err := sup.Start(); err != nil { + log.Fatalf("mailcow-agent: start main: %v", err) + } + } + + table, err := services.Build(service, sup) + if err != nil { + log.Fatalf("mailcow-agent: %v", err) + } + + // We handle signals ourselves so we can (a) suppress the Go-runtime stack + // dump on SIGQUIT (php-fpm-alpine's STOPSIGNAL) and (b) remember which + // signal arrived to forward it to the child on shutdown. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, + syscall.SIGHUP, syscall.SIGUSR1, syscall.SIGUSR2) + defer signal.Stop(sigCh) + + stopSig := atomicSignal{} + stopSig.Store(syscall.SIGTERM) + + go func() { + for sig := range sigCh { + switch sig { + case syscall.SIGTERM, syscall.SIGINT, syscall.SIGQUIT: + stopSig.Store(sig.(syscall.Signal)) + log.Printf("mailcow-agent: caught %s, beginning graceful shutdown", sig) + cancel() + return + case syscall.SIGHUP, syscall.SIGUSR1, syscall.SIGUSR2: + if sup != nil { + sup.SignalChild(sig) + } + } + } + }() + + // Initial state is "ok" so the service isn't flagged unhealthy before the + // first probe has run. + health := &healthState{ok: true, detail: "", at: time.Now()} + if table.HealthProbe != nil { + go runHealthLoop(ctx, table.HealthProbe, health, 10*time.Second) + } + + hb := registry.Heartbeat{ + Service: service, + NodeID: nodeID, + Version: agentVersion, + StartedAt: time.Now(), + Image: os.Getenv("MAILCOW_AGENT_IMAGE"), + Health: health, + } + go registry.Loop(ctx, rdb, hb, 10*time.Second) + + // cgroup stats for this container. Host metrics come from exec.host-stats. + pub := stats.NewPublisher(rdb, service, nodeID) + go pub.Run(ctx, 10*time.Second) + + srv := bus.NewServer(rdb, table, nodeID) + busErrCh := make(chan error, 1) + go func() { busErrCh <- srv.Run(ctx) }() + + log.Printf("mailcow-agent: service=%s node=%s ready (commands=%d)", service, nodeID, len(table.Handlers)) + + // Exit only on outside termination or fatal bus error. A crashed/stopped + // child should not tear down the container — the operator may want to + // issue `start` over the bus afterwards. + exitCode := 0 + select { + case <-ctx.Done(): + log.Println("mailcow-agent: shutdown signal received") + case err := <-busErrCh: + if err != nil && !errors.Is(err, context.Canceled) { + log.Printf("mailcow-agent: bus loop exited: %v", err) + exitCode = 1 + } + } + + // Graceful shutdown bounded at 35s. + shutCtx, shutCancel := context.WithTimeout(context.Background(), 35*time.Second) + defer shutCancel() + _ = srv.Shutdown(shutCtx) + _ = registry.Deregister(shutCtx, rdb, service, nodeID) + if sup != nil { + // Forward the exact signal we received so the child sees the same + // shutdown semantics it would without us in front (e.g. SIGQUIT → + // php-fpm graceful drain). + if err := sup.StopWithSignal(shutCtx, stopSig.Load()); err != nil { + log.Printf("mailcow-agent: stop main: %v", err) + } + } + os.Exit(exitCode) +} + +// runHealthcheckOnce runs the local probe with a tight deadline and exits 0/1. +// Used by the `healthcheck` sub-command path. +func runHealthcheckOnce(service string) { + table, err := services.Build(service, nil) + if err != nil { + fmt.Fprintln(os.Stderr, "mailcow-agent healthcheck:", err) + os.Exit(2) + } + if table.HealthProbe == nil { + // Services without a probe are considered healthy. + os.Exit(0) + } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if err := table.HealthProbe(ctx); err != nil { + fmt.Fprintln(os.Stderr, "unhealthy:", err) + os.Exit(1) + } + os.Exit(0) +} + +// runHealthLoop ticks the probe and updates state. Same probe path as the +// healthcheck sub-command. +func runHealthLoop(ctx context.Context, probe commands.HealthProbe, state *healthState, interval time.Duration) { + t := time.NewTicker(interval) + defer t.Stop() + check := func() { + pctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + if err := probe(pctx); err != nil { + state.Set(false, err.Error()) + } else { + state.Set(true, "") + } + } + check() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + check() + } + } +} + +func newRedis() (*redis.Client, error) { + addr := os.Getenv("REDIS_SLAVEOF_IP") + port := os.Getenv("REDIS_SLAVEOF_PORT") + if addr == "" { + addr = "redis-mailcow" + port = "6379" + } + if port == "" { + port = "6379" + } + pass := os.Getenv("REDISPASS") + cli := redis.NewClient(&redis.Options{ + Addr: addr + ":" + port, + Password: pass, + DB: 0, + MaxRetries: -1, + MinRetryBackoff: 200 * time.Millisecond, + MaxRetryBackoff: 5 * time.Second, + }) + // Wait up to 2 minutes for Redis to come up before giving up + deadline := time.Now().Add(2 * time.Minute) + var lastErr error + for attempt := 1; time.Now().Before(deadline); attempt++ { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + err := cli.Ping(ctx).Err() + cancel() + if err == nil { + return cli, nil + } + lastErr = err + wait := time.Duration(attempt) * time.Second + if wait > 10*time.Second { + wait = 10 * time.Second + } + log.Printf("mailcow-agent: waiting for redis %s (attempt %d): %v", addr, attempt, err) + time.Sleep(wait) + } + return nil, fmt.Errorf("ping %s after 2m: %w", addr, lastErr) +} diff --git a/data/Dockerfiles/agent/go.mod b/data/Dockerfiles/agent/go.mod new file mode 100644 index 000000000..17e7d61c6 --- /dev/null +++ b/data/Dockerfiles/agent/go.mod @@ -0,0 +1,13 @@ +module github.com/mailcow/mailcow-dockerized/agent + +go 1.22 + +require ( + github.com/oklog/ulid/v2 v2.1.0 + github.com/redis/go-redis/v9 v9.7.0 +) + +require ( + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect +) diff --git a/data/Dockerfiles/agent/go.sum b/data/Dockerfiles/agent/go.sum new file mode 100644 index 000000000..ea3cb1443 --- /dev/null +++ b/data/Dockerfiles/agent/go.sum @@ -0,0 +1,6 @@ +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/redis/go-redis/v9 v9.7.0 h1:HhLSs+B6O021gwzl+locl0zEDnyNkxMtf/Z3NNBMa9E= +github.com/redis/go-redis/v9 v9.7.0/go.mod h1:f6zhXITC7JUJIlPEiBOTXxJgPLdZcA93GewI7inzyWw= diff --git a/data/Dockerfiles/agent/internal/bus/bus.go b/data/Dockerfiles/agent/internal/bus/bus.go new file mode 100644 index 000000000..19f3e6c81 --- /dev/null +++ b/data/Dockerfiles/agent/internal/bus/bus.go @@ -0,0 +1,175 @@ +// Package bus implements the Redis Pub/Sub control bus: subscribing to the +// service's control channel, dispatching envelopes to a commands.Table, and +// publishing responses back to env.ReplyTo. +package bus + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "sync" + "time" + + "github.com/redis/go-redis/v9" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/envelope" +) + +// ControlChannel assembles the per-service control channel. +func ControlChannel(service string) string { return "mailcow.control." + service } + +// Server subscribes to a control channel and dispatches commands. +type Server struct { + rdb *redis.Client + table *commands.Table + nodeID string + dedupe *lru + stop chan struct{} + stopped sync.Once + wg sync.WaitGroup +} + +// NewServer wires a fresh server. nodeID is stamped into every Response and is +// what the backend's fan-in aggregator uses to attribute results. +func NewServer(rdb *redis.Client, table *commands.Table, nodeID string) *Server { + return &Server{ + rdb: rdb, + table: table, + nodeID: nodeID, + dedupe: newLRU(1024), + stop: make(chan struct{}), + } +} + +// Run blocks, subscribing to ControlChannel(service) and dispatching incoming +// envelopes concurrently. It returns when ctx is done or Shutdown is called. +func (s *Server) Run(ctx context.Context) error { + channel := ControlChannel(s.table.Service) + sub := s.rdb.Subscribe(ctx, channel) + defer sub.Close() + if _, err := sub.Receive(ctx); err != nil { + return fmt.Errorf("bus: subscribe %s: %w", channel, err) + } + msgs := sub.Channel() + for { + select { + case <-ctx.Done(): + s.wg.Wait() + return ctx.Err() + case <-s.stop: + s.wg.Wait() + return nil + case m, ok := <-msgs: + if !ok { + s.wg.Wait() + return errors.New("bus: subscription channel closed") + } + s.wg.Add(1) + go func(payload string) { + defer s.wg.Done() + s.dispatch(ctx, payload) + }(m.Payload) + } + } +} + +// Shutdown signals Run to stop and waits for in-flight handlers (bounded by +// ctx). +func (s *Server) Shutdown(ctx context.Context) error { + s.stopped.Do(func() { close(s.stop) }) + done := make(chan struct{}) + go func() { s.wg.Wait(); close(done) }() + select { + case <-done: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +func (s *Server) dispatch(parent context.Context, payload string) { + var req envelope.Request + if err := json.Unmarshal([]byte(payload), &req); err != nil { + // Malformed envelope: no RequestID/ReplyTo we can trust — drop. + return + } + if req.RequestID != "" && !s.dedupe.add(req.RequestID) { + // Duplicate (retry of an idempotent command): silently absorb. + return + } + // Per-node targeting: if args.target_node is set and doesn't match us, + // drop silently. The intended replica picks it up and replies. + if target, ok := req.Args["target_node"].(string); ok && target != "" && target != s.nodeID { + return + } + + ctx, cancel := handlerContext(parent, req.Deadline) + defer cancel() + + start := time.Now() + resp := envelope.Response{RequestID: req.RequestID, OK: true, Node: s.nodeID} + + if h := s.table.Lookup(req.Cmd); h == nil { + resp.OK = false + resp.Error = fmt.Sprintf("no handler for cmd %q", req.Cmd) + resp.ErrorCode = envelope.ErrCodeUnsupportedCommand + } else { + result, err := runWithRecover(ctx, h, req.Args) + switch { + case err == nil: + resp.Result = result + case errors.Is(err, commands.ErrNotFound): + resp.OK = false + resp.Error = err.Error() + resp.ErrorCode = envelope.ErrCodeNotFound + case errors.Is(err, commands.ErrValidation): + resp.OK = false + resp.Error = err.Error() + resp.ErrorCode = envelope.ErrCodeValidation + case errors.Is(err, context.DeadlineExceeded), errors.Is(ctx.Err(), context.DeadlineExceeded): + resp.OK = false + resp.Error = err.Error() + resp.ErrorCode = envelope.ErrCodeTimeout + default: + resp.OK = false + resp.Error = err.Error() + resp.ErrorCode = envelope.ErrCodeInternal + } + } + resp.DurationMS = time.Since(start).Milliseconds() + + if req.ReplyTo == "" { + return + } + data, err := json.Marshal(resp) + if err != nil { + return + } + // Replies go through a List (RPUSH + EXPIRE), not Pub/Sub. This sidesteps + // the "subscribe-before-publish" race and lets the backend fan-in via + // BLPOP — important because PhpRedis's subscribe() blocks, so we can't + // publish on the same connection after subscribing. Use parent ctx so a + // per-handler deadline can't stop us from delivering the timeout response. + pipe := s.rdb.Pipeline() + pipe.RPush(parent, req.ReplyTo, data) + pipe.Expire(parent, req.ReplyTo, 60*time.Second) + _, _ = pipe.Exec(parent) +} + +func runWithRecover(ctx context.Context, h commands.Handler, args map[string]any) (out any, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("handler panic: %v", r) + } + }() + return h(ctx, args) +} + +func handlerContext(parent context.Context, deadline time.Time) (context.Context, context.CancelFunc) { + if deadline.IsZero() { + return context.WithCancel(parent) + } + return context.WithDeadline(parent, deadline) +} diff --git a/data/Dockerfiles/agent/internal/bus/lru.go b/data/Dockerfiles/agent/internal/bus/lru.go new file mode 100644 index 000000000..4414a6b88 --- /dev/null +++ b/data/Dockerfiles/agent/internal/bus/lru.go @@ -0,0 +1,38 @@ +package bus + +import ( + "container/list" + "sync" +) + +// lru is a tiny request-id deduplication cache. The bus treats Pub/Sub +// retries (same request_id) as no-ops. Not a security boundary — only a +// best-effort guard against accidental double-execution. +type lru struct { + mu sync.Mutex + cap int + idx map[string]*list.Element + list *list.List +} + +func newLRU(cap int) *lru { + return &lru{cap: cap, idx: make(map[string]*list.Element, cap), list: list.New()} +} + +// add returns true if id is new and was inserted; false if it was already +// known (caller should skip the duplicate). +func (l *lru) add(id string) bool { + l.mu.Lock() + defer l.mu.Unlock() + if _, ok := l.idx[id]; ok { + return false + } + e := l.list.PushFront(id) + l.idx[id] = e + for l.list.Len() > l.cap { + old := l.list.Back() + l.list.Remove(old) + delete(l.idx, old.Value.(string)) + } + return true +} diff --git a/data/Dockerfiles/agent/internal/commands/commands.go b/data/Dockerfiles/agent/internal/commands/commands.go new file mode 100644 index 000000000..4f8312922 --- /dev/null +++ b/data/Dockerfiles/agent/internal/commands/commands.go @@ -0,0 +1,83 @@ +// Package commands defines the per-service handler table. The bus dispatcher +// looks up handlers by name and wraps the result in an envelope.Response. +package commands + +import ( + "context" + "errors" +) + +// ErrNotFound signals that the target (queue id, mailbox, …) doesn't live on +// this node. For broadcast operations the aggregator still counts success if +// any other node returns ok. +var ErrNotFound = errors.New("not_found") + +// ErrValidation indicates a missing or malformed argument. +var ErrValidation = errors.New("validation") + +// Handler executes a single command for a service. +type Handler func(ctx context.Context, args map[string]any) (any, error) + +// HealthProbe returns nil if the supervised service is healthy, error otherwise. +// Shared between the `healthcheck` sub-command and the agent's heartbeat loop. +type HealthProbe func(ctx context.Context) error + +// Table is the per-service command registry built once at startup. +type Table struct { + Service string + Handlers map[string]Handler + HealthProbe HealthProbe +} + +// New constructs an empty table for a service. +func New(service string) *Table { + return &Table{Service: service, Handlers: make(map[string]Handler)} +} + +// Register adds a handler. Duplicate registration panics — wiring bugs should +// be loud. +func (t *Table) Register(cmd string, h Handler) { + if _, dup := t.Handlers[cmd]; dup { + panic("commands: duplicate handler " + t.Service + "/" + cmd) + } + t.Handlers[cmd] = h +} + +// Lookup returns the handler for cmd or nil. +func (t *Table) Lookup(cmd string) Handler { + return t.Handlers[cmd] +} + +// ArgString extracts a required string argument. +func ArgString(args map[string]any, key string) (string, error) { + v, ok := args[key] + if !ok { + return "", errArg(key, "missing") + } + s, ok := v.(string) + if !ok || s == "" { + return "", errArg(key, "must be non-empty string") + } + return s, nil +} + +// ArgStringOpt returns an optional string argument with a default. +func ArgStringOpt(args map[string]any, key, def string) string { + if v, ok := args[key]; ok { + if s, ok := v.(string); ok && s != "" { + return s + } + } + return def +} + +func errArg(key, reason string) error { + return &validationError{key: key, reason: reason} +} + +type validationError struct{ key, reason string } + +func (e *validationError) Error() string { return "arg " + e.key + ": " + e.reason } +func (e *validationError) Is(target error) bool { + return target == ErrValidation +} diff --git a/data/Dockerfiles/agent/internal/commands/shell.go b/data/Dockerfiles/agent/internal/commands/shell.go new file mode 100644 index 000000000..12f3e5f2c --- /dev/null +++ b/data/Dockerfiles/agent/internal/commands/shell.go @@ -0,0 +1,60 @@ +package commands + +import ( + "bytes" + "context" + "fmt" + "os/exec" +) + +// RunOptions configures a single Run invocation. +type RunOptions struct { + // Stdin, if non-nil, is written to the process stdin. + Stdin []byte + // CombinedOutputCap limits the captured output (truncated at the end). + // 0 means unlimited. The agent uses ~1 MiB for cat-queue, smaller for + // status-style commands. + OutputCap int +} + +// RunResult is what every shell-style command returns. +type RunResult struct { + Stdout string `json:"stdout,omitempty"` + Stderr string `json:"stderr,omitempty"` + ExitCode int `json:"exit_code"` +} + +// Run executes argv[0] argv[1:] under ctx (the bus deadline). It does not +// translate exit codes to errors — callers inspect r.ExitCode themselves so +// they can map e.g. "queue id not found" exit codes to ErrNotFound. +func Run(ctx context.Context, opts RunOptions, argv ...string) (*RunResult, error) { + if len(argv) == 0 { + return nil, fmt.Errorf("commands.Run: empty argv") + } + cmd := exec.CommandContext(ctx, argv[0], argv[1:]...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if opts.Stdin != nil { + cmd.Stdin = bytes.NewReader(opts.Stdin) + } + err := cmd.Run() + + out := stdout.String() + errOut := stderr.String() + if opts.OutputCap > 0 { + if len(out) > opts.OutputCap { + out = out[:opts.OutputCap] + "\n…(truncated)" + } + if len(errOut) > opts.OutputCap { + errOut = errOut[:opts.OutputCap] + "\n…(truncated)" + } + } + + exit := 0 + if exitErr, ok := err.(*exec.ExitError); ok { + exit = exitErr.ExitCode() + err = nil + } + return &RunResult{Stdout: out, Stderr: errOut, ExitCode: exit}, err +} diff --git a/data/Dockerfiles/agent/internal/envelope/envelope.go b/data/Dockerfiles/agent/internal/envelope/envelope.go new file mode 100644 index 000000000..27f5eb883 --- /dev/null +++ b/data/Dockerfiles/agent/internal/envelope/envelope.go @@ -0,0 +1,34 @@ +// Package envelope defines the wire format for the mailcow-agent control bus. +package envelope + +import "time" + +// Request is what the backend publishes on mailcow.control.. +type Request struct { + Cmd string `json:"cmd"` + RequestID string `json:"request_id"` + Args map[string]any `json:"args,omitempty"` + ReplyTo string `json:"reply_to,omitempty"` + Deadline time.Time `json:"deadline,omitempty"` + IssuedBy string `json:"issued_by,omitempty"` +} + +// Response is what the agent publishes on the reply_to channel. +type Response struct { + RequestID string `json:"request_id"` + OK bool `json:"ok"` + Result any `json:"result,omitempty"` + Error string `json:"error,omitempty"` + ErrorCode string `json:"error_code,omitempty"` + DurationMS int64 `json:"duration_ms"` + Node string `json:"node,omitempty"` +} + +// Error codes returned in Response.ErrorCode. Keep in sync with the V2 schema. +const ( + ErrCodeValidation = "validation" + ErrCodeNotFound = "not_found" + ErrCodeTimeout = "timeout" + ErrCodeUnsupportedCommand = "unsupported_command" + ErrCodeInternal = "internal" +) diff --git a/data/Dockerfiles/agent/internal/proc/proc.go b/data/Dockerfiles/agent/internal/proc/proc.go new file mode 100644 index 000000000..0ce007c98 --- /dev/null +++ b/data/Dockerfiles/agent/internal/proc/proc.go @@ -0,0 +1,253 @@ +// Package proc supervises the service's main process — postfix, dovecot, +// nginx, … — as a child of the agent. It exposes the high-level lifecycle +// verbs (reload/restart/stop/start) used by the per-service command tables. +// +// "reload" → SIGHUP +// "restart" → SIGTERM, wait, exec again +// "stop" → SIGTERM, leave stopped +// "start" → exec again (only if currently stopped) +package proc + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "sync" + "syscall" + "time" +) + +// Supervisor wraps a single child process. +type Supervisor struct { + cmdLine string // shell command (passed to `sh -c …`) + stopSignal os.Signal + stopGrace time.Duration + + mu sync.Mutex + cmd *exec.Cmd + stopped bool + exitedCh chan struct{} +} + +// New constructs a Supervisor. cmdLine is executed via `sh -c` so existing +// docker-entrypoint.sh scripts keep working without quoting headaches. +func New(cmdLine string) *Supervisor { + return &Supervisor{ + cmdLine: cmdLine, + stopSignal: syscall.SIGTERM, + stopGrace: 30 * time.Second, + } +} + +// Start launches the child process. Returns an error if it cannot be spawned. +// The agent's main() also blocks on Wait() to surface exit status. +func (s *Supervisor) Start() error { + s.mu.Lock() + defer s.mu.Unlock() + if s.cmd != nil && s.cmd.Process != nil && !s.stopped { + return errors.New("proc: already running") + } + // `exec ` prefix tells the shell to replace itself with the command + // instead of forking and waiting. Without it, sh stays alive as the + // parent of the real service process, signals from us land on the + // shell instead of on the service, and SIGHUP for config reloads + // silently does nothing. With the prefix the supervised PID *is* the + // service after the script's own `exec "$@"` chains through. + cmd := exec.Command("/bin/sh", "-c", "exec "+s.cmdLine) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + if err := cmd.Start(); err != nil { + return fmt.Errorf("proc: start: %w", err) + } + s.cmd = cmd + s.stopped = false + s.exitedCh = make(chan struct{}) + go func() { + _ = cmd.Wait() + close(s.exitedCh) + }() + return nil +} + +// Wait blocks until the child exits and returns its exit code. +func (s *Supervisor) Wait() int { + s.mu.Lock() + exited := s.exitedCh + cmd := s.cmd + s.mu.Unlock() + if exited == nil { + return -1 + } + <-exited + if cmd == nil || cmd.ProcessState == nil { + return -1 + } + return cmd.ProcessState.ExitCode() +} + +// SignalChild forwards a single signal to the supervised child without +// changing the supervisor's lifecycle state. Used to relay SIGHUP/USR1/USR2 +// from the agent's signal handler to the service so operators can still +// `docker compose kill -s HUP postfix-mailcow` and see the expected effect. +func (s *Supervisor) SignalChild(sig os.Signal) error { + s.mu.Lock() + defer s.mu.Unlock() + if s.cmd == nil || s.cmd.Process == nil || s.stopped { + return errors.New("proc: not running") + } + return s.cmd.Process.Signal(sig) +} + +// Reload sends SIGHUP. Returns nil if the signal was delivered. +func (s *Supervisor) Reload() error { + s.mu.Lock() + defer s.mu.Unlock() + if s.cmd == nil || s.cmd.Process == nil || s.stopped { + return errors.New("proc: not running") + } + return s.cmd.Process.Signal(syscall.SIGHUP) +} + +// Stop sends the configured stop signal and waits for the process to exit +// (bounded by stopGrace). Marks the supervisor as stopped — Start must be +// called again to relaunch. +func (s *Supervisor) Stop(ctx context.Context) error { + return s.StopWithSignal(ctx, s.stopSignal) +} + +// StopWithSignal is like Stop but lets the caller override the stop signal. +// Used by main() to forward whatever signal Docker sent us (SIGTERM for +// most containers, SIGQUIT for php-fpm-alpine which uses SIGQUIT for +// graceful shutdown) so the child gets the same signal semantics it would +// receive without the agent in front of it. +func (s *Supervisor) StopWithSignal(ctx context.Context, sig os.Signal) error { + s.mu.Lock() + cmd := s.cmd + exited := s.exitedCh + if cmd == nil || cmd.Process == nil { + s.mu.Unlock() + return nil + } + s.stopped = true + s.mu.Unlock() + + sysSig, ok := sig.(syscall.Signal) + if !ok { + sysSig = syscall.SIGTERM + } + pgid, err := syscall.Getpgid(cmd.Process.Pid) + if err == nil { + _ = syscall.Kill(-pgid, sysSig) + } else { + _ = cmd.Process.Signal(sysSig) + } + + timer := time.NewTimer(s.stopGrace) + defer timer.Stop() + select { + case <-exited: + return nil + case <-timer.C: + // Last resort: SIGKILL the whole process group. + if pgid, err := syscall.Getpgid(cmd.Process.Pid); err == nil { + _ = syscall.Kill(-pgid, syscall.SIGKILL) + } else { + _ = cmd.Process.Kill() + } + <-exited + return errors.New("proc: forced kill after grace period") + case <-ctx.Done(): + return ctx.Err() + } +} + +// Restart performs Stop+Start using the supervisor's default stop signal. +// Different from a Docker-initiated shutdown: here it's an explicit "restart +// this service" command, so we want the standard SIGTERM semantics. +func (s *Supervisor) Restart(ctx context.Context) error { + if err := s.Stop(ctx); err != nil { + return err + } + return s.Start() +} + +// IsRunning reports whether the supervised child is currently alive (started +// and not yet exited or stopped). +func (s *Supervisor) IsRunning() bool { + s.mu.Lock() + defer s.mu.Unlock() + if s.stopped || s.cmd == nil || s.cmd.Process == nil { + return false + } + // exitedCh is closed when the child exits. Non-blocking read. + select { + case <-s.exitedCh: + return false + default: + return true + } +} + +// WaitStable blocks for `settle` and returns nil if the supervised child is +// still running at the end, otherwise an error describing the exit. Used by +// the `restart` command to give the operator real "did it come back up" +// feedback instead of an immediate OK. +func (s *Supervisor) WaitStable(ctx context.Context, settle time.Duration) error { + s.mu.Lock() + exited := s.exitedCh + s.mu.Unlock() + if exited == nil { + return errors.New("proc: not running") + } + select { + case <-exited: + // Child died within the settle window. + s.mu.Lock() + cmd := s.cmd + s.mu.Unlock() + code := -1 + if cmd != nil && cmd.ProcessState != nil { + code = cmd.ProcessState.ExitCode() + } + return fmt.Errorf("proc: child exited within settle window (code=%d)", code) + case <-time.After(settle): + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +// Forward installs a signal forwarder: SIGINT/SIGTERM/SIGHUP/SIGUSR1/SIGUSR2 +// received by the agent are propagated to the child. Returns a cancel func +// to release the handler. +func (s *Supervisor) Forward(signals ...os.Signal) func() { + ch := make(chan os.Signal, len(signals)+1) + signalNotify(ch, signals...) + done := make(chan struct{}) + go func() { + for { + select { + case <-done: + return + case sig := <-ch: + s.mu.Lock() + cmd := s.cmd + s.mu.Unlock() + if cmd != nil && cmd.Process != nil { + _ = cmd.Process.Signal(sig) + } + if sig == syscall.SIGTERM || sig == syscall.SIGINT { + // On terminal signals propagate and let main exit. + return + } + } + } + }() + return func() { + close(done) + signalStop(ch) + } +} diff --git a/data/Dockerfiles/agent/internal/proc/signal.go b/data/Dockerfiles/agent/internal/proc/signal.go new file mode 100644 index 000000000..da282d902 --- /dev/null +++ b/data/Dockerfiles/agent/internal/proc/signal.go @@ -0,0 +1,14 @@ +package proc + +import ( + "os" + "os/signal" +) + +// Indirection so tests can stub these out if ever needed. +var ( + signalNotify = signal.Notify + signalStop = signal.Stop +) + +var _ = os.Stdout // anchor import for go vet diff --git a/data/Dockerfiles/agent/internal/registry/registry.go b/data/Dockerfiles/agent/internal/registry/registry.go new file mode 100644 index 000000000..0293a8e14 --- /dev/null +++ b/data/Dockerfiles/agent/internal/registry/registry.go @@ -0,0 +1,97 @@ +// Package registry publishes per-node heartbeats to Redis so the backend can +// enumerate live containers. Two keys per service: +// +// ZSET mailcow.nodes. score=unix_ts member=node_id +// HASH mailcow.node.. { version, started_at, image, health* } +// +// Both keys have a 30s TTL refreshed every 10s. Deregister clears them on +// graceful shutdown. +package registry + +import ( + "context" + "fmt" + "strconv" + "time" + + "github.com/redis/go-redis/v9" +) + +// HealthSnapshotter returns the latest health probe result so the heartbeat +// can attach it to each tick. Implemented by main.healthState. +type HealthSnapshotter interface { + Snapshot() (ok bool, detail string, at time.Time) +} + +// Heartbeat carries the metadata published with every refresh. +type Heartbeat struct { + Service string + NodeID string + Version string + StartedAt time.Time + Image string + Health HealthSnapshotter // optional; nil → omit health fields +} + +func nodesKey(service string) string { return "mailcow.nodes." + service } +func nodeKey(service, node string) string { return "mailcow.node." + service + "." + node } + +// Publish writes one heartbeat tick. Callers run this in a loop. +func Publish(ctx context.Context, rdb *redis.Client, h Heartbeat) error { + now := time.Now().Unix() + fields := map[string]any{ + "version": h.Version, + "started_at": h.StartedAt.UTC().Format(time.RFC3339), + "image": h.Image, + "node_id": h.NodeID, + "service": h.Service, + "updated_at": strconv.FormatInt(now, 10), + } + if h.Health != nil { + ok, detail, at := h.Health.Snapshot() + if ok { + fields["health"] = "ok" + } else { + fields["health"] = "fail" + } + fields["health_detail"] = detail + fields["health_at"] = strconv.FormatInt(at.Unix(), 10) + } + pipe := rdb.Pipeline() + pipe.ZAdd(ctx, nodesKey(h.Service), redis.Z{Score: float64(now), Member: h.NodeID}) + pipe.Expire(ctx, nodesKey(h.Service), 5*time.Minute) + pipe.HSet(ctx, nodeKey(h.Service, h.NodeID), fields) + pipe.Expire(ctx, nodeKey(h.Service, h.NodeID), 30*time.Second) + _, err := pipe.Exec(ctx) + if err != nil { + return fmt.Errorf("registry: heartbeat exec: %w", err) + } + return nil +} + +// Deregister removes the node from the ZSET and deletes its detail hash. +// Called on graceful shutdown so the dashboard reflects intentional stops +// immediately rather than waiting for TTL. +func Deregister(ctx context.Context, rdb *redis.Client, service, nodeID string) error { + pipe := rdb.Pipeline() + pipe.ZRem(ctx, nodesKey(service), nodeID) + pipe.Del(ctx, nodeKey(service, nodeID)) + _, err := pipe.Exec(ctx) + return err +} + +// Loop runs Publish on a ticker until ctx is done. It is the typical caller. +func Loop(ctx context.Context, rdb *redis.Client, h Heartbeat, interval time.Duration) { + // Publish once immediately so the dashboard sees us right away. + _ = Publish(ctx, rdb, h) + t := time.NewTicker(interval) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + _ = Publish(ctx, rdb, h) + } + } +} diff --git a/data/Dockerfiles/agent/internal/services/clock.go b/data/Dockerfiles/agent/internal/services/clock.go new file mode 100644 index 000000000..5782419f1 --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/clock.go @@ -0,0 +1,9 @@ +package services + +import "time" + +// nowStamp returns a sortable timestamp used to suffix moved/garbage maildirs +// so repeated cleanups don't collide. +func nowStamp() string { + return time.Now().UTC().Format("20060102T150405Z") +} diff --git a/data/Dockerfiles/agent/internal/services/dovecot.go b/data/Dockerfiles/agent/internal/services/dovecot.go new file mode 100644 index 000000000..b6aa5bef6 --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/dovecot.go @@ -0,0 +1,294 @@ +package services + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("dovecot", buildDovecot) } + +const vmailRoot = "/var/vmail" + +func dovecotHealthProbe(ctx context.Context) error { + // IMAP greeting on :143 — must be "* OK ..." + conn, err := net.DialTimeout("tcp", "127.0.0.1:143", 3*time.Second) + if err != nil { + return err + } + defer conn.Close() + buf := make([]byte, 64) + _ = conn.SetReadDeadline(time.Now().Add(3 * time.Second)) + n, err := conn.Read(buf) + if err != nil { + return fmt.Errorf("read greeting: %w", err) + } + greeting := string(buf[:n]) + if !strings.HasPrefix(greeting, "* OK") { + return fmt.Errorf("unexpected greeting: %s", strings.TrimSpace(greeting)) + } + return nil +} + +func buildDovecot(sup *proc.Supervisor) *commands.Table { + t := commands.New("dovecot") + t.HealthProbe = dovecotHealthProbe + + // `dovecot reload` re-reads config without restarting the master process. + t.Register("reload", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "dovecot", "reload") + return nil, asError(r, err) + }) + addLifecycleExceptReload(t, sup) + + t.Register("exec.fts-rescan", func(ctx context.Context, args map[string]any) (any, error) { + user := commands.ArgStringOpt(args, "user", "") + argv := []string{"doveadm", "fts", "rescan"} + if user != "" { + argv = append(argv, "-u", user) + } else { + argv = append(argv, "-A") + } + r, err := commands.Run(ctx, commands.RunOptions{}, argv...) + return nil, asError(r, err) + }) + + t.Register("exec.sieve-list", func(ctx context.Context, args map[string]any) (any, error) { + user, err := commands.ArgString(args, "user") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "doveadm", "sieve", "list", "-u", user) + if err != nil { + return nil, err + } + if r.ExitCode != 0 { + return nil, &runError{msg: strings.TrimSpace(r.Stderr)} + } + scripts := splitNonEmpty(r.Stdout) + return map[string]any{"scripts": scripts}, nil + }) + + t.Register("exec.sieve-print", func(ctx context.Context, args map[string]any) (any, error) { + user, err := commands.ArgString(args, "user") + if err != nil { + return nil, err + } + script, err := commands.ArgString(args, "script") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{OutputCap: 1 << 20}, "doveadm", "sieve", "get", "-u", user, script) + if err != nil { + return nil, err + } + if r.ExitCode != 0 { + return nil, &runError{msg: strings.TrimSpace(r.Stderr)} + } + return map[string]any{"body": r.Stdout}, nil + }) + + t.Register("exec.acl-get", func(ctx context.Context, args map[string]any) (any, error) { + user, err := commands.ArgString(args, "user") + if err != nil { + return nil, err + } + // First enumerate mailboxes, then collect ACLs per mailbox. + boxes, err := commands.Run(ctx, commands.RunOptions{}, "doveadm", "mailbox", "list", "-u", user) + if err != nil { + return nil, err + } + if boxes.ExitCode != 0 { + return nil, &runError{msg: strings.TrimSpace(boxes.Stderr)} + } + out := []map[string]any{} + for _, mbx := range splitNonEmpty(boxes.Stdout) { + r, err := commands.Run(ctx, commands.RunOptions{}, "doveadm", "acl", "get", "-u", user, mbx) + if err != nil || r.ExitCode != 0 { + continue + } + for _, line := range strings.Split(strings.TrimSpace(r.Stdout), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "ID") { + continue + } + fields := strings.Fields(line) + if len(fields) >= 2 { + out = append(out, map[string]any{ + "mailbox": mbx, + "identifier": fields[0], + "rights": strings.Join(fields[1:], " "), + }) + } + } + } + return map[string]any{"acls": out}, nil + }) + + t.Register("exec.acl-set", func(ctx context.Context, args map[string]any) (any, error) { + user, err := commands.ArgString(args, "user") + if err != nil { + return nil, err + } + mailbox, err := commands.ArgString(args, "mailbox") + if err != nil { + return nil, err + } + identifier, err := commands.ArgString(args, "identifier") + if err != nil { + return nil, err + } + rights, err := commands.ArgString(args, "rights") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "doveadm", "acl", "set", "-u", user, mailbox, identifier, rights) + return nil, asError(r, err) + }) + + t.Register("exec.acl-delete", func(ctx context.Context, args map[string]any) (any, error) { + user, err := commands.ArgString(args, "user") + if err != nil { + return nil, err + } + mailbox, err := commands.ArgString(args, "mailbox") + if err != nil { + return nil, err + } + identifier, err := commands.ArgString(args, "identifier") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "doveadm", "acl", "delete", "-u", user, mailbox, identifier) + return nil, asError(r, err) + }) + + t.Register("exec.maildir-cleanup", func(ctx context.Context, args map[string]any) (any, error) { + maildir, err := commands.ArgString(args, "maildir") + if err != nil { + return nil, err + } + if err := assertSafeMaildirPath(maildir); err != nil { + return nil, err + } + src := filepath.Join(vmailRoot, maildir) + dst := filepath.Join(vmailRoot, "_garbage", maildir+"_"+nowStamp()) + if _, err := os.Stat(src); os.IsNotExist(err) { + return nil, commands.ErrNotFound + } + if err := os.MkdirAll(filepath.Dir(dst), 0o770); err != nil { + return nil, err + } + return nil, os.Rename(src, dst) + }) + + t.Register("exec.df", func(ctx context.Context, args map[string]any) (any, error) { + dir := commands.ArgStringOpt(args, "dir", "/var/vmail") + var st syscall.Statfs_t + if err := syscall.Statfs(dir, &st); err != nil { + return nil, err + } + size := uint64(st.Blocks) * uint64(st.Bsize) + free := uint64(st.Bavail) * uint64(st.Bsize) + used := size - free + pct := 0 + if size > 0 { + pct = int(float64(used) / float64(size) * 100) + } + // Format: Filesystem,Size,Used,Avail,Use%,Mounted-on + return fmt.Sprintf("%s,%s,%s,%s,%d%%,%s", + "local", humanBytes(size), humanBytes(used), humanBytes(free), pct, dir), nil + }) + + t.Register("exec.maildir-move", func(ctx context.Context, args map[string]any) (any, error) { + from, err := commands.ArgString(args, "from") + if err != nil { + return nil, err + } + to, err := commands.ArgString(args, "to") + if err != nil { + return nil, err + } + if err := assertSafeMaildirPath(from); err != nil { + return nil, err + } + if err := assertSafeMaildirPath(to); err != nil { + return nil, err + } + src := filepath.Join(vmailRoot, from) + dst := filepath.Join(vmailRoot, to) + if _, err := os.Stat(src); os.IsNotExist(err) { + return nil, commands.ErrNotFound + } + if err := os.MkdirAll(filepath.Dir(dst), 0o770); err != nil { + return nil, err + } + return nil, os.Rename(src, dst) + }) + + return t +} + +// addLifecycleExceptReload wires restart/stop/start without overriding reload, +// which postfix/dovecot define themselves (canonical CLI command). +func addLifecycleExceptReload(t *commands.Table, sup *proc.Supervisor) { + if sup == nil { + return + } + t.Register("restart", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Restart(ctx) + }) + t.Register("stop", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Stop(ctx) + }) + t.Register("start", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Start() + }) +} + +func splitNonEmpty(s string) []string { + out := []string{} + for _, line := range strings.Split(strings.TrimSpace(s), "\n") { + line = strings.TrimSpace(line) + if line != "" { + out = append(out, line) + } + } + return out +} + +// assertSafeMaildirPath blocks path traversal and absolute paths — relative +// names under /var/vmail only. +func assertSafeMaildirPath(p string) error { + if p == "" || strings.HasPrefix(p, "/") || strings.Contains(p, "..") { + return &validationErr{msg: "unsafe maildir path"} + } + return nil +} + +type validationErr struct{ msg string } + +func (e *validationErr) Error() string { return e.msg } +func (e *validationErr) Is(target error) bool { return target == commands.ErrValidation } + +// humanBytes renders a byte count in `df -H` style (1000-based units). +func humanBytes(n uint64) string { + const unit = 1000 + if n < unit { + return fmt.Sprintf("%dB", n) + } + div, exp := uint64(unit), 0 + for x := n / unit; x >= unit; x /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f%c", float64(n)/float64(div), "KMGTPE"[exp]) +} diff --git a/data/Dockerfiles/agent/internal/services/generic.go b/data/Dockerfiles/agent/internal/services/generic.go new file mode 100644 index 000000000..666aee62f --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/generic.go @@ -0,0 +1,37 @@ +package services + +import ( + "context" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +// Services without any exec.* commands of their own — lifecycle only. +func init() { + Register("clamd", genericBuilder("clamd", tcpProbe("127.0.0.1:3310", 2*time.Second))) + Register("olefy", genericBuilder("olefy", tcpProbe("127.0.0.1:10055", 2*time.Second))) + Register("postfix-tlspol", genericBuilder("postfix-tlspol", tcpProbe("127.0.0.1:8642", 2*time.Second))) + Register("php-fpm", genericBuilder("php-fpm", tcpProbe("127.0.0.1:9001", 2*time.Second))) + Register("acme", genericBuilder("acme", nil)) + Register("watchdog", genericBuilder("watchdog", nil)) + Register("netfilter", genericBuilder("netfilter", nil)) + Register("ofelia", genericBuilder("ofelia", nil)) + Register("dovecot-fts", genericBuilder("dovecot-fts", nil)) +} + +func genericBuilder(name string, probe commands.HealthProbe) Builder { + return func(sup *proc.Supervisor) *commands.Table { + t := commands.New(name) + t.HealthProbe = probe + addLifecycle(t, sup) + return t + } +} + +func tcpProbe(addr string, timeout time.Duration) commands.HealthProbe { + return func(ctx context.Context) error { + return probeTCP(addr, timeout) + } +} diff --git a/data/Dockerfiles/agent/internal/services/helpers.go b/data/Dockerfiles/agent/internal/services/helpers.go new file mode 100644 index 000000000..510e8b6ee --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/helpers.go @@ -0,0 +1,79 @@ +package services + +import ( + "strings" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" +) + +// runError is what we return when a shell command exited non-zero but the +// failure is not a "target not found" case. The bus maps it to +// ErrCodeInternal. +type runError struct{ msg string } + +func (e *runError) Error() string { return e.msg } + +// asError converts a (RunResult, err) pair from commands.Run into a single +// error: pre-exec error → return as-is; non-zero exit → wrap stderr. +func asError(r *commands.RunResult, err error) error { + if err != nil { + return err + } + if r.ExitCode != 0 { + msg := strings.TrimSpace(r.Stderr) + if msg == "" { + msg = "command exited " + itoa(r.ExitCode) + } + return &runError{msg: msg} + } + return nil +} + +// asNotFoundOrError is the variant for queue/mailbox operations that may fail +// because the target doesn't live on this node. Maps known stderr fragments +// to commands.ErrNotFound so broadcast aggregation works. +func asNotFoundOrError(r *commands.RunResult, err error) error { + if err != nil { + return err + } + if r.ExitCode == 0 { + return nil + } + if matchesAny(r.Stderr, notFoundFragments) { + return commands.ErrNotFound + } + return &runError{msg: strings.TrimSpace(r.Stderr)} +} + +func matchesAny(haystack string, fragments []string) bool { + for _, f := range fragments { + if strings.Contains(haystack, f) { + return true + } + } + return false +} + +func itoa(i int) string { + // avoid strconv import for a one-shot; small ints only + if i == 0 { + return "0" + } + neg := false + if i < 0 { + neg = true + i = -i + } + var b [20]byte + n := len(b) + for i > 0 { + n-- + b[n] = byte('0' + i%10) + i /= 10 + } + if neg { + n-- + b[n] = '-' + } + return string(b[n:]) +} diff --git a/data/Dockerfiles/agent/internal/services/host.go b/data/Dockerfiles/agent/internal/services/host.go new file mode 100644 index 000000000..abc50278e --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/host.go @@ -0,0 +1,236 @@ +package services + +import ( + "bufio" + "context" + "fmt" + "os" + "strconv" + "strings" + "syscall" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("host", buildHost) } + +// hostProcRoot is where the host-agent container mounts /proc. If we're not +// running as host-agent, falling back to /proc still produces sensible numbers +// (the container's own view) so dashboards don't blank out in unit tests. +var hostProcRoot = "/host/proc" + +func resolveProc(p string) string { + if _, err := os.Stat(hostProcRoot); err == nil { + return hostProcRoot + p + } + return "/proc" + p +} + +func buildHost(_ *proc.Supervisor) *commands.Table { + t := commands.New("host") + // No lifecycle — the host-agent container has no main process to manage. + + t.Register("exec.df", func(ctx context.Context, args map[string]any) (any, error) { + path := commands.ArgStringOpt(args, "path", "/") + var stat syscall.Statfs_t + if err := syscall.Statfs(path, &stat); err != nil { + return nil, fmt.Errorf("statfs %s: %w", path, err) + } + size := int64(stat.Blocks) * int64(stat.Bsize) + free := int64(stat.Bavail) * int64(stat.Bsize) + used := size - free + return map[string]any{ + "path": path, + "size": size, + "used": used, + "available": free, + }, nil + }) + + t.Register("exec.host-stats", func(ctx context.Context, _ map[string]any) (any, error) { + return readHostStats() + }) + + return t +} + +func readHostStats() (map[string]any, error) { + out := map[string]any{ + "system_time": time.Now().Format("2006-01-02 15:04:05"), + "architecture": readArchitecture(), + } + + if uptime, err := readUptime(); err == nil { + out["uptime"] = int64(uptime) + } else { + out["uptime"] = int64(0) + } + + cores := readCPUCores() + cpuUsage, _ := sampleHostCPU(500 * time.Millisecond) + out["cpu"] = map[string]any{ + "cores": cores, + "usage": cpuUsage, + } + + memTotal, memUsage := readMemoryTotalAndUsagePct() + out["memory"] = map[string]any{ + "total": memTotal, // bytes + "usage": memUsage, // percent 0..100 + } + + return out, nil +} + +// readArchitecture returns the host's machine architecture (e.g. "x86_64", +// "aarch64"). Falls back to a single dash if syscall.Uname fails. +func readArchitecture() string { + var u syscall.Utsname + if err := syscall.Uname(&u); err != nil { + return "-" + } + return charsToString(u.Machine[:]) +} + +func charsToString(b []int8) string { + out := make([]byte, 0, len(b)) + for _, c := range b { + if c == 0 { + break + } + out = append(out, byte(c)) + } + return string(out) +} + +// readCPUCores counts `^processor` lines in /proc/cpuinfo. On a container +// with /host/proc bind-mounted this gives the host's logical CPU count, +// not the container's cgroup limits. +func readCPUCores() int { + f, err := os.Open(resolveProc("/cpuinfo")) + if err != nil { + return 0 + } + defer f.Close() + n := 0 + sc := bufio.NewScanner(f) + for sc.Scan() { + if strings.HasPrefix(sc.Text(), "processor") { + n++ + } + } + return n +} + +// readMemoryTotalAndUsagePct reads /proc/meminfo and returns (total_bytes, +// usage_pct_0_100). "Usage" is computed as (Total - Available)/Total which +// matches what tools like `free` show as "used". +func readMemoryTotalAndUsagePct() (int64, int) { + f, err := os.Open(resolveProc("/meminfo")) + if err != nil { + return 0, 0 + } + defer f.Close() + + var total, available int64 + sc := bufio.NewScanner(f) + for sc.Scan() { + fields := strings.Fields(sc.Text()) + if len(fields) < 2 { + continue + } + switch fields[0] { + case "MemTotal:": + total = parseInt64(fields[1]) * 1024 + case "MemAvailable:": + available = parseInt64(fields[1]) * 1024 + } + } + if total <= 0 { + return 0, 0 + } + used := total - available + if available <= 0 { + used = total + } + pct := int(float64(used) / float64(total) * 100.0) + if pct < 0 { + pct = 0 + } + if pct > 100 { + pct = 100 + } + return total, pct +} + +func readUptime() (float64, error) { + b, err := os.ReadFile(resolveProc("/uptime")) + if err != nil { + return 0, err + } + fields := strings.Fields(string(b)) + if len(fields) < 1 { + return 0, fmt.Errorf("malformed uptime") + } + return strconv.ParseFloat(fields[0], 64) +} + +// sampleHostCPU returns CPU utilization (0..100) sampled over `window`. +func sampleHostCPU(window time.Duration) (float64, error) { + a, err := readCPULine() + if err != nil { + return 0, err + } + time.Sleep(window) + b, err := readCPULine() + if err != nil { + return 0, err + } + totalA, totalB := sum(a), sum(b) + idleA, idleB := a[3], b[3] + dTotal, dIdle := totalB-totalA, idleB-idleA + if dTotal == 0 { + return 0, nil + } + return 100.0 * float64(dTotal-dIdle) / float64(dTotal), nil +} + +func readCPULine() ([]int64, error) { + f, err := os.Open(resolveProc("/stat")) + if err != nil { + return nil, err + } + defer f.Close() + sc := bufio.NewScanner(f) + if !sc.Scan() { + return nil, fmt.Errorf("empty /proc/stat") + } + fields := strings.Fields(sc.Text()) + if len(fields) < 5 || fields[0] != "cpu" { + return nil, fmt.Errorf("unexpected /proc/stat first line") + } + out := make([]int64, 0, len(fields)-1) + for _, f := range fields[1:] { + n, err := strconv.ParseInt(f, 10, 64) + if err != nil { + return nil, err + } + out = append(out, n) + } + return out, nil +} + +func sum(xs []int64) int64 { + var s int64 + for _, x := range xs { + s += x + } + return s +} + +func parseInt64(s string) int64 { + n, _ := strconv.ParseInt(s, 10, 64) + return n +} diff --git a/data/Dockerfiles/agent/internal/services/nginx.go b/data/Dockerfiles/agent/internal/services/nginx.go new file mode 100644 index 000000000..5cedd42e3 --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/nginx.go @@ -0,0 +1,39 @@ +package services + +import ( + "context" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("nginx", buildNginx) } + +func nginxHealthProbe(ctx context.Context) error { + if err := probeShell(ctx, 3*time.Second, "nginx", "-t"); err != nil { + return err + } + return probeTCP("127.0.0.1:8081", 2*time.Second) +} + +func buildNginx(sup *proc.Supervisor) *commands.Table { + t := commands.New("nginx") + t.HealthProbe = nginxHealthProbe + t.Register("reload", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "nginx", "-s", "reload") + return nil, asError(r, err) + }) + addLifecycleExceptReload(t, sup) + t.Register("exec.test-config", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "nginx", "-t") + if err != nil { + return nil, err + } + return map[string]any{ + "ok": r.ExitCode == 0, + "output": r.Stderr + r.Stdout, + }, nil + }) + return t +} diff --git a/data/Dockerfiles/agent/internal/services/postfix.go b/data/Dockerfiles/agent/internal/services/postfix.go new file mode 100644 index 000000000..1b98a68bd --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/postfix.go @@ -0,0 +1,142 @@ +package services + +import ( + "context" + "encoding/json" + "strings" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("postfix", buildPostfix) } + +// notFoundFragments are substrings emitted by postsuper/postqueue when the +// requested queue id doesn't live on this node. Broadcast handlers map them +// to commands.ErrNotFound so the backend can count partial success. +var notFoundFragments = []string{ + "No such file or directory", + "no such file", + "unknown", +} + +func postfixHealthProbe(ctx context.Context) error { + if err := probeSMTPGreeting("127.0.0.1:25", 3*time.Second); err != nil { + return err + } + return probeShell(ctx, 5*time.Second, "postfix", "status") +} + +func buildPostfix(sup *proc.Supervisor) *commands.Table { + t := commands.New("postfix") + t.HealthProbe = postfixHealthProbe + + // Override generic reload — `postfix reload` is the canonical operation, + // not SIGHUP-to-supervisord (which would just rotate logs). + t.Register("reload", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "postfix", "reload") + return nil, asError(r, err) + }) + // Lifecycle: stop/start/restart still go through the supervisor. + if sup != nil { + t.Register("restart", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Restart(ctx) + }) + t.Register("stop", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Stop(ctx) + }) + t.Register("start", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Start() + }) + } + + t.Register("exec.mailq", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{OutputCap: 8 << 20}, "postqueue", "-j") + if err != nil { + return nil, err + } + if r.ExitCode != 0 { + return nil, &runError{msg: "postqueue failed: " + r.Stderr} + } + // postqueue -j prints one JSON object per line. + entries := make([]map[string]any, 0) + for _, line := range strings.Split(strings.TrimSpace(r.Stdout), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + var obj map[string]any + if err := json.Unmarshal([]byte(line), &obj); err == nil { + entries = append(entries, obj) + } + } + return map[string]any{"queue": entries}, nil + }) + + t.Register("exec.flush-queue", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "postqueue", "-f") + return nil, asError(r, err) + }) + + t.Register("exec.delete-from-queue", func(ctx context.Context, args map[string]any) (any, error) { + qid, err := commands.ArgString(args, "queue_id") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "postsuper", "-d", qid) + return nil, asNotFoundOrError(r, err) + }) + + t.Register("exec.hold-queue", func(ctx context.Context, args map[string]any) (any, error) { + qid, err := commands.ArgString(args, "queue_id") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "postsuper", "-h", qid) + return nil, asNotFoundOrError(r, err) + }) + + t.Register("exec.unhold-queue", func(ctx context.Context, args map[string]any) (any, error) { + qid, err := commands.ArgString(args, "queue_id") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "postsuper", "-H", qid) + return nil, asNotFoundOrError(r, err) + }) + + t.Register("exec.deliver-now", func(ctx context.Context, args map[string]any) (any, error) { + qid, err := commands.ArgString(args, "queue_id") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "postqueue", "-i", qid) + return nil, asNotFoundOrError(r, err) + }) + + t.Register("exec.cat-queue", func(ctx context.Context, args map[string]any) (any, error) { + qid, err := commands.ArgString(args, "queue_id") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{OutputCap: 2 << 20}, "postcat", "-q", qid) + if err != nil { + return nil, err + } + if r.ExitCode != 0 { + if matchesAny(r.Stderr, notFoundFragments) { + return nil, commands.ErrNotFound + } + return nil, &runError{msg: "postcat failed: " + r.Stderr} + } + return map[string]any{"body": r.Stdout}, nil + }) + + t.Register("exec.super-delete", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "postsuper", "-d", "ALL") + return nil, asError(r, err) + }) + + return t +} diff --git a/data/Dockerfiles/agent/internal/services/probes.go b/data/Dockerfiles/agent/internal/services/probes.go new file mode 100644 index 000000000..c6350302b --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/probes.go @@ -0,0 +1,81 @@ +package services + +import ( + "bufio" + "context" + "errors" + "fmt" + "net" + "net/http" + "strings" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" +) + +// probeTCP opens a TCP connection to addr within timeout. Returns nil if the +// port accepts a connection, otherwise the dial error. +func probeTCP(addr string, timeout time.Duration) error { + conn, err := net.DialTimeout("tcp", addr, timeout) + if err != nil { + return err + } + _ = conn.Close() + return nil +} + +// probeSMTPGreeting connects to addr and reads the SMTP greeting line. The +// service is considered healthy if the line starts with "220". +func probeSMTPGreeting(addr string, timeout time.Duration) error { + conn, err := net.DialTimeout("tcp", addr, timeout) + if err != nil { + return err + } + defer conn.Close() + _ = conn.SetReadDeadline(time.Now().Add(timeout)) + line, err := bufio.NewReader(conn).ReadString('\n') + if err != nil { + return fmt.Errorf("read greeting: %w", err) + } + if !strings.HasPrefix(line, "220") { + return fmt.Errorf("unexpected greeting: %s", strings.TrimSpace(line)) + } + return nil +} + +// probeHTTP issues a GET to url, checks for a 2xx status. +func probeHTTP(ctx context.Context, url string, timeout time.Duration) error { + cctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + req, err := http.NewRequestWithContext(cctx, http.MethodGet, url, nil) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("http %s", resp.Status) + } + return nil +} + +// probeShell runs argv with a timeout and returns nil if exit code is 0. +func probeShell(ctx context.Context, timeout time.Duration, argv ...string) error { + cctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + r, err := commands.Run(cctx, commands.RunOptions{}, argv...) + if err != nil { + return err + } + if r.ExitCode != 0 { + msg := strings.TrimSpace(r.Stderr) + if msg == "" { + msg = fmt.Sprintf("exit %d", r.ExitCode) + } + return errors.New(msg) + } + return nil +} diff --git a/data/Dockerfiles/agent/internal/services/rspamd.go b/data/Dockerfiles/agent/internal/services/rspamd.go new file mode 100644 index 000000000..035b3049c --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/rspamd.go @@ -0,0 +1,86 @@ +package services + +import ( + "context" + "os" + "path/filepath" + "strings" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("rspamd", buildRspamd) } + +func rspamdHealthProbe(ctx context.Context) error { + return probeHTTP(ctx, "http://127.0.0.1:11334/ping", 3*time.Second) +} + +// Override file rspamd reads on startup for the controller's enable_password. +const rspamdWorkerPasswordPath = "/etc/rspamd/override.d/worker-controller-password.inc" + +func buildRspamd(sup *proc.Supervisor) *commands.Table { + t := commands.New("rspamd") + t.HealthProbe = rspamdHealthProbe + addLifecycle(t, sup) + + t.Register("exec.set-worker-password", func(ctx context.Context, args map[string]any) (any, error) { + password, err := commands.ArgString(args, "password") + if err != nil { + return nil, err + } + // rspamadm pw -e -p writes the hashed value to stdout. + r, err := commands.Run(ctx, commands.RunOptions{}, "rspamadm", "pw", "-e", "-p", password) + if err != nil { + return nil, err + } + if r.ExitCode != 0 { + return nil, &runError{msg: "rspamadm pw failed: " + strings.TrimSpace(r.Stderr)} + } + hash := strings.TrimSpace(r.Stdout) + // rspamd distinguishes `password` (read-only access to the controller) + // from `enable_password` (write access — restart, settings, learn). + content := "enable_password = \"" + hash + "\";\n" + if err := os.MkdirAll(filepath.Dir(rspamdWorkerPasswordPath), 0o755); err != nil { + return nil, err + } + if err := os.WriteFile(rspamdWorkerPasswordPath, []byte(content), 0o644); err != nil { + return nil, err + } + // Must do a full re-fork of workers (SIGHUP to rspamd master), not + // `rspamadm control reload` + if sup != nil { + return nil, sup.Reload() + } + return nil, nil + }) + + t.Register("exec.relearn-spam", func(ctx context.Context, args map[string]any) (any, error) { + path, err := commands.ArgString(args, "file") + if err != nil { + return nil, err + } + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{Stdin: data}, "rspamc", "learn_spam") + return nil, asError(r, err) + }) + + t.Register("exec.relearn-ham", func(ctx context.Context, args map[string]any) (any, error) { + path, err := commands.ArgString(args, "file") + if err != nil { + return nil, err + } + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{Stdin: data}, "rspamc", "learn_ham") + return nil, asError(r, err) + }) + + return t +} diff --git a/data/Dockerfiles/agent/internal/services/services.go b/data/Dockerfiles/agent/internal/services/services.go new file mode 100644 index 000000000..408b1b1f3 --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/services.go @@ -0,0 +1,87 @@ +// Package services registers per-service command tables. The agent selects +// the right table at startup via MAILCOW_AGENT_SERVICE. +// +// A service "builder" receives a Supervisor for lifecycle commands; services +// that don't supervise a main process (currently just "host") pass nil and +// the generic lifecycle commands are skipped. +package services + +import ( + "context" + "fmt" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +// Builder constructs a command table for a service. sup may be nil for +// services without a supervised main process. +type Builder func(sup *proc.Supervisor) *commands.Table + +var registry = map[string]Builder{} + +// Register installs a builder for a service name. Called from init() in each +// per-service file. +func Register(service string, b Builder) { + if _, dup := registry[service]; dup { + panic("services: duplicate registration for " + service) + } + registry[service] = b +} + +// Build returns the table for service, or an error if no builder exists. +func Build(service string, sup *proc.Supervisor) (*commands.Table, error) { + b, ok := registry[service] + if !ok { + return nil, fmt.Errorf("services: unknown service %q (set MAILCOW_AGENT_SERVICE correctly)", service) + } + return b(sup), nil +} + +// Known returns the list of registered service names (sorted-ish, depends on +// map iteration — for help output only). +func Known() []string { + out := make([]string, 0, len(registry)) + for k := range registry { + out = append(out, k) + } + return out +} + +// restartSettle is how long we wait after a Start to verify the new child +// didn't immediately crash. Gives the operator real "did the service come +// back up?" feedback instead of an instant OK that hides flapping services. +const restartSettle = 3 * time.Second + +// addLifecycle wires reload/restart/stop/start onto t backed by sup. Services +// override these (e.g. postfix overrides reload to run `postfix reload`). +func addLifecycle(t *commands.Table, sup *proc.Supervisor) { + if sup == nil { + return + } + t.Register("reload", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Reload() + }) + t.Register("restart", func(ctx context.Context, _ map[string]any) (any, error) { + if err := sup.Restart(ctx); err != nil { + return nil, err + } + if err := sup.WaitStable(ctx, restartSettle); err != nil { + return nil, err + } + return map[string]any{"status": "restarted", "settled_ms": int(restartSettle / time.Millisecond)}, nil + }) + t.Register("stop", func(ctx context.Context, _ map[string]any) (any, error) { + return nil, sup.Stop(ctx) + }) + t.Register("start", func(ctx context.Context, _ map[string]any) (any, error) { + if err := sup.Start(); err != nil { + return nil, err + } + if err := sup.WaitStable(ctx, restartSettle); err != nil { + return nil, err + } + return map[string]any{"status": "started", "settled_ms": int(restartSettle / time.Millisecond)}, nil + }) +} diff --git a/data/Dockerfiles/agent/internal/services/sogo.go b/data/Dockerfiles/agent/internal/services/sogo.go new file mode 100644 index 000000000..7f1dfc03d --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/sogo.go @@ -0,0 +1,36 @@ +package services + +import ( + "context" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("sogo", buildSogo) } + +func sogoHealthProbe(ctx context.Context) error { + return probeHTTP(ctx, "http://127.0.0.1:20000/SOGo.index/", 3*time.Second) +} + +func buildSogo(sup *proc.Supervisor) *commands.Table { + t := commands.New("sogo") + t.HealthProbe = sogoHealthProbe + addLifecycle(t, sup) + + t.Register("exec.rename-user", func(ctx context.Context, args map[string]any) (any, error) { + oldName, err := commands.ArgString(args, "old") + if err != nil { + return nil, err + } + newName, err := commands.ArgString(args, "new") + if err != nil { + return nil, err + } + r, err := commands.Run(ctx, commands.RunOptions{}, "sogo-tool", "rename-user", oldName, newName) + return nil, asError(r, err) + }) + + return t +} diff --git a/data/Dockerfiles/agent/internal/services/unbound.go b/data/Dockerfiles/agent/internal/services/unbound.go new file mode 100644 index 000000000..2b9821c57 --- /dev/null +++ b/data/Dockerfiles/agent/internal/services/unbound.go @@ -0,0 +1,26 @@ +package services + +import ( + "context" + "time" + + "github.com/mailcow/mailcow-dockerized/agent/internal/commands" + "github.com/mailcow/mailcow-dockerized/agent/internal/proc" +) + +func init() { Register("unbound", buildUnbound) } + +func unboundHealthProbe(ctx context.Context) error { + return probeShell(ctx, 3*time.Second, "dig", "+time=2", "+tries=1", "@127.0.0.1", "mailcow.email", "A") +} + +func buildUnbound(sup *proc.Supervisor) *commands.Table { + t := commands.New("unbound") + t.HealthProbe = unboundHealthProbe + addLifecycle(t, sup) + t.Register("exec.flush-cache", func(ctx context.Context, _ map[string]any) (any, error) { + r, err := commands.Run(ctx, commands.RunOptions{}, "unbound-control", "flush_zone", ".") + return nil, asError(r, err) + }) + return t +} diff --git a/data/Dockerfiles/agent/internal/stats/stats.go b/data/Dockerfiles/agent/internal/stats/stats.go new file mode 100644 index 000000000..2272252fa --- /dev/null +++ b/data/Dockerfiles/agent/internal/stats/stats.go @@ -0,0 +1,155 @@ +// Package stats reads cgroup CPU + memory usage and publishes them to +// +// HASH mailcow.stats.. +// +// with a 30s TTL. Supports both cgroup v1 and v2. The numbers are intentionally +// approximate — they replace what dockerapi exposed via /containers//stats. +package stats + +import ( + "context" + "os" + "strconv" + "strings" + "time" + + "github.com/redis/go-redis/v9" +) + +// Sample is one observation. CPUPercent is the share of one host CPU consumed +// since the previous sample (range 0..100*numCPU). +type Sample struct { + CPUPercent float64 + MemoryBytes int64 + MemoryLimit int64 + Timestamp time.Time +} + +func statsKey(service, node string) string { return "mailcow.stats." + service + "." + node } + +// Publisher reads cgroup metrics and pushes them to Redis on a ticker. +type Publisher struct { + rdb *redis.Client + service string + node string + + // previous CPU sample to derive a delta-based percent + prevCPUNanos int64 + prevAt time.Time +} + +// NewPublisher constructs a publisher. Caller drives it via Run. +func NewPublisher(rdb *redis.Client, service, node string) *Publisher { + return &Publisher{rdb: rdb, service: service, node: node} +} + +// Run blocks on a ticker until ctx is done. +func (p *Publisher) Run(ctx context.Context, interval time.Duration) { + t := time.NewTicker(interval) + defer t.Stop() + // Prime the CPU sample so the first publish has a real delta. + if cpu, ok := readCPUNanos(); ok { + p.prevCPUNanos = cpu + p.prevAt = time.Now() + } + // Immediate first publish so the dashboard never sees a node without a + // stats hash. CPU is 0 in this first sample (no prev delta yet); memory + // is already accurate. + _ = p.publish(ctx, p.sample()) + for { + select { + case <-ctx.Done(): + return + case <-t.C: + _ = p.publish(ctx, p.sample()) + } + } +} + +func (p *Publisher) sample() Sample { + s := Sample{Timestamp: time.Now()} + if cpu, ok := readCPUNanos(); ok { + if !p.prevAt.IsZero() { + dCPU := cpu - p.prevCPUNanos + dT := s.Timestamp.Sub(p.prevAt).Nanoseconds() + if dT > 0 && dCPU >= 0 { + s.CPUPercent = (float64(dCPU) / float64(dT)) * 100.0 + } + } + p.prevCPUNanos = cpu + p.prevAt = s.Timestamp + } + if mem, limit, ok := readMemory(); ok { + s.MemoryBytes = mem + s.MemoryLimit = limit + } + return s +} + +func (p *Publisher) publish(ctx context.Context, s Sample) error { + pipe := p.rdb.Pipeline() + pipe.HSet(ctx, statsKey(p.service, p.node), map[string]any{ + "cpu_percent": strconv.FormatFloat(s.CPUPercent, 'f', 2, 64), + "memory_bytes": s.MemoryBytes, + "memory_limit": s.MemoryLimit, + "timestamp": s.Timestamp.Unix(), + "node_id": p.node, + "service": p.service, + }) + pipe.Expire(ctx, statsKey(p.service, p.node), 30*time.Second) + _, err := pipe.Exec(ctx) + return err +} + +// --- cgroup readers -------------------------------------------------------- + +// readCPUNanos returns total CPU-nanoseconds consumed by the current cgroup, +// summed across all CPUs. Works for both cgroup v2 (cpu.stat) and v1 +// (cpuacct.usage). +func readCPUNanos() (int64, bool) { + if data, err := os.ReadFile("/sys/fs/cgroup/cpu.stat"); err == nil { + // v2: lines like "usage_usec 12345" + for _, line := range strings.Split(string(data), "\n") { + if strings.HasPrefix(line, "usage_usec ") { + n, err := strconv.ParseInt(strings.TrimPrefix(line, "usage_usec "), 10, 64) + if err == nil { + return n * 1000, true // µs → ns + } + } + } + } + if data, err := os.ReadFile("/sys/fs/cgroup/cpuacct/cpuacct.usage"); err == nil { + n, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) + if err == nil { + return n, true + } + } + return 0, false +} + +// readMemory returns current usage and limit in bytes. +func readMemory() (int64, int64, bool) { + // v2 + if cur, err := readInt("/sys/fs/cgroup/memory.current"); err == nil { + limit, _ := readInt("/sys/fs/cgroup/memory.max") + return cur, limit, true + } + // v1 + if cur, err := readInt("/sys/fs/cgroup/memory/memory.usage_in_bytes"); err == nil { + limit, _ := readInt("/sys/fs/cgroup/memory/memory.limit_in_bytes") + return cur, limit, true + } + return 0, 0, false +} + +func readInt(path string) (int64, error) { + b, err := os.ReadFile(path) + if err != nil { + return 0, err + } + s := strings.TrimSpace(string(b)) + if s == "max" { + return -1, nil + } + return strconv.ParseInt(s, 10, 64) +} diff --git a/data/Dockerfiles/agent/mailcow-agent-cli b/data/Dockerfiles/agent/mailcow-agent-cli new file mode 100644 index 000000000..97cda7ff3 --- /dev/null +++ b/data/Dockerfiles/agent/mailcow-agent-cli @@ -0,0 +1,58 @@ +#!/bin/sh +# mailcow-agent-cli — publish a control-bus command from inside a service +# container, optionally collecting one reply. Same wire protocol as the Go +# agent (see internal/envelope/envelope.go). +# +# Usage: +# mailcow-agent-cli send [json-args] +# Fire-and-forget. Prints the number of subscribers reached. +# mailcow-agent-cli call [json-args] [timeout-seconds] +# Publish + wait for one reply on its private reply list. Prints the +# reply envelope JSON on stdout. +# +# Requires the `redis-cli` binary to be present in the calling container. + +set -e + +op="${1:-}" +svc="${2:-}" +cmd="${3:-}" +args="${4:-{\}}" +tmo="${5:-10}" + +if [ -z "$op" ] || [ -z "$svc" ] || [ -z "$cmd" ]; then + echo "usage: $0 send|call [json-args] [timeout-seconds]" >&2 + exit 2 +fi + +redis_host="${REDIS_SLAVEOF_IP:-redis-mailcow}" +redis_port="${REDIS_SLAVEOF_PORT:-6379}" + +rcli() { + if [ -n "${REDISPASS:-}" ]; then + redis-cli -h "$redis_host" -p "$redis_port" -a "$REDISPASS" --no-auth-warning "$@" + else + redis-cli -h "$redis_host" -p "$redis_port" "$@" + fi +} + +rid="$(date +%s%N)$$" +issued_by="$(hostname 2>/dev/null || echo unknown)" + +case "$op" in + send) + payload="{\"cmd\":\"${cmd}\",\"request_id\":\"${rid}\",\"args\":${args},\"issued_by\":\"${issued_by}\"}" + rcli PUBLISH "mailcow.control.${svc}" "$payload" + ;; + call) + reply="mailcow.reply.${rid}" + payload="{\"cmd\":\"${cmd}\",\"request_id\":\"${rid}\",\"args\":${args},\"reply_to\":\"${reply}\",\"issued_by\":\"${issued_by}\"}" + rcli PUBLISH "mailcow.control.${svc}" "$payload" >/dev/null + # BLPOP returns two lines: the list name then the value. Print only the value. + rcli BLPOP "$reply" "$tmo" 2>/dev/null | tail -n1 + ;; + *) + echo "usage: $0 send|call [json-args] [timeout-seconds]" >&2 + exit 2 + ;; +esac diff --git a/data/Dockerfiles/clamd/Dockerfile b/data/Dockerfiles/clamd/Dockerfile index e60e7eef1..9e5ec9505 100644 --- a/data/Dockerfiles/clamd/Dockerfile +++ b/data/Dockerfiles/clamd/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.21 AS builder WORKDIR /src @@ -41,7 +45,7 @@ RUN wget -P /src https://www.clamav.net/downloads/production/clamav-${CLAMD_VERS -D ENABLE_MILTER=ON \ -D ENABLE_MAN_PAGES=OFF \ -D ENABLE_STATIC_LIB=OFF \ - -D ENABLE_JSON_SHARED=ON \ + -D ENABLE_JSON_SHARED=ON \ && cmake --build . \ && make DESTDIR="/clamav" -j$(($(nproc) - 1)) install \ && rm -r "/clamav/usr/lib/pkgconfig/" \ @@ -104,7 +108,15 @@ COPY healthcheck.sh /healthcheck.sh COPY clamdcheck.sh /usr/local/bin RUN chmod +x /healthcheck.sh RUN chmod +x /usr/local/bin/clamdcheck.sh -HEALTHCHECK --start-period=6m CMD "/healthcheck.sh" -ENTRYPOINT [] -CMD ["/sbin/tini", "-g", "--", "/clamd.sh"] \ No newline at end of file +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=clamd \ + MAILCOW_AGENT_MAIN_CMD="/sbin/tini -g -- /clamd.sh" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] \ No newline at end of file diff --git a/data/Dockerfiles/dockerapi/Dockerfile b/data/Dockerfiles/dockerapi/Dockerfile deleted file mode 100644 index c27f6154b..000000000 --- a/data/Dockerfiles/dockerapi/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM alpine:3.23 - -LABEL maintainer = "The Infrastructure Company GmbH " - -ARG PIP_BREAK_SYSTEM_PACKAGES=1 -WORKDIR /app - -RUN apk add --update --no-cache python3 \ - py3-pip \ - openssl \ - tzdata \ - py3-psutil \ - py3-redis \ - py3-async-timeout \ -&& pip3 install --upgrade pip \ - fastapi \ - uvicorn \ - aiodocker \ - docker -RUN mkdir /app/modules - -COPY docker-entrypoint.sh /app/ -COPY main.py /app/main.py -COPY modules/ /app/modules/ - -ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] -CMD ["python", "main.py"] \ No newline at end of file diff --git a/data/Dockerfiles/dockerapi/docker-entrypoint.sh b/data/Dockerfiles/dockerapi/docker-entrypoint.sh deleted file mode 100755 index 64f4b8295..000000000 --- a/data/Dockerfiles/dockerapi/docker-entrypoint.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -`openssl req -x509 -newkey rsa:4096 -sha256 -days 3650 -nodes \ - -keyout /app/dockerapi_key.pem \ - -out /app/dockerapi_cert.pem \ - -subj /CN=dockerapi/O=mailcow \ - -addext subjectAltName=DNS:dockerapi` - -exec "$@" diff --git a/data/Dockerfiles/dockerapi/main.py b/data/Dockerfiles/dockerapi/main.py deleted file mode 100644 index bf197bd61..000000000 --- a/data/Dockerfiles/dockerapi/main.py +++ /dev/null @@ -1,261 +0,0 @@ -import os -import sys -import uvicorn -import json -import uuid -import async_timeout -import asyncio -import aiodocker -import docker -import logging -from logging.config import dictConfig -from fastapi import FastAPI, Response, Request -from modules.DockerApi import DockerApi -from redis import asyncio as aioredis -from contextlib import asynccontextmanager - -dockerapi = None - -@asynccontextmanager -async def lifespan(app: FastAPI): - global dockerapi - - # Initialize a custom logger - logger = logging.getLogger("dockerapi") - logger.setLevel(logging.INFO) - # Configure the logger to output logs to the terminal - handler = logging.StreamHandler() - handler.setLevel(logging.INFO) - formatter = logging.Formatter("%(levelname)s: %(message)s") - handler.setFormatter(formatter) - logger.addHandler(handler) - - logger.info("Init APP") - - # Init redis client - if os.environ['REDIS_SLAVEOF_IP'] != "": - redis_client = redis = await aioredis.from_url(f"redis://{os.environ['REDIS_SLAVEOF_IP']}:{os.environ['REDIS_SLAVEOF_PORT']}/0", password=os.environ['REDISPASS']) - else: - redis_client = redis = await aioredis.from_url("redis://redis-mailcow:6379/0", password=os.environ['REDISPASS']) - - # Init docker clients - sync_docker_client = docker.DockerClient(base_url='unix://var/run/docker.sock', version='auto') - async_docker_client = aiodocker.Docker(url='unix:///var/run/docker.sock') - - dockerapi = DockerApi(redis_client, sync_docker_client, async_docker_client, logger) - - logger.info("Subscribe to redis channel") - # Subscribe to redis channel - dockerapi.pubsub = redis.pubsub() - await dockerapi.pubsub.subscribe("MC_CHANNEL") - asyncio.create_task(handle_pubsub_messages(dockerapi.pubsub)) - - - yield - - # Close docker connections - dockerapi.sync_docker_client.close() - await dockerapi.async_docker_client.close() - - # Close redis - await dockerapi.pubsub.unsubscribe("MC_CHANNEL") - await dockerapi.redis_client.close() - -app = FastAPI(lifespan=lifespan) - -# Define Routes -@app.get("/host/stats") -async def get_host_update_stats(): - global dockerapi - - if dockerapi.host_stats_isUpdating == False: - asyncio.create_task(dockerapi.get_host_stats()) - dockerapi.host_stats_isUpdating = True - - while True: - if await dockerapi.redis_client.exists('host_stats'): - break - await asyncio.sleep(1.5) - - stats = json.loads(await dockerapi.redis_client.get('host_stats')) - return Response(content=json.dumps(stats, indent=4), media_type="application/json") - -@app.get("/containers/{container_id}/json") -async def get_container(container_id : str): - global dockerapi - - if container_id and container_id.isalnum(): - try: - for container in (await dockerapi.async_docker_client.containers.list()): - if container._id == container_id: - container_info = await container.show() - return Response(content=json.dumps(container_info, indent=4), media_type="application/json") - - res = { - "type": "danger", - "msg": "no container found" - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - except Exception as e: - res = { - "type": "danger", - "msg": str(e) - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - res = { - "type": "danger", - "msg": "no or invalid id defined" - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - -@app.get("/containers/json") -async def get_containers(all: bool = False): - global dockerapi - - containers = {} - try: - for container in (await dockerapi.async_docker_client.containers.list(all=all)): - container_info = await container.show() - containers.update({container_info['Id']: container_info}) - return Response(content=json.dumps(containers, indent=4), media_type="application/json") - except Exception as e: - res = { - "type": "danger", - "msg": str(e) - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - -@app.post("/containers/{container_id}/{post_action}") -async def post_containers(container_id : str, post_action : str, request: Request): - global dockerapi - - try: - request_json = await request.json() - except Exception as err: - request_json = {} - - if container_id and container_id.isalnum() and post_action: - try: - """Dispatch container_post api call""" - if post_action == 'exec': - if not request_json or not 'cmd' in request_json: - res = { - "type": "danger", - "msg": "cmd is missing" - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - if not request_json or not 'task' in request_json: - res = { - "type": "danger", - "msg": "task is missing" - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - - api_call_method_name = '__'.join(['container_post', str(post_action), str(request_json['cmd']), str(request_json['task']) ]) - else: - api_call_method_name = '__'.join(['container_post', str(post_action) ]) - - api_call_method = getattr(dockerapi, api_call_method_name, lambda container_id: Response(content=json.dumps({'type': 'danger', 'msg':'container_post - unknown api call' }, indent=4), media_type="application/json")) - - dockerapi.logger.info("api call: %s, container_id: %s" % (api_call_method_name, container_id)) - return api_call_method(request_json, container_id=container_id) - except Exception as e: - dockerapi.logger.error("error - container_post: %s" % str(e)) - res = { - "type": "danger", - "msg": str(e) - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - - else: - res = { - "type": "danger", - "msg": "invalid container id or missing action" - } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - -@app.post("/container/{container_id}/stats/update") -async def post_container_update_stats(container_id : str): - global dockerapi - - # start update task for container if no task is running - if container_id not in dockerapi.containerIds_to_update: - asyncio.create_task(dockerapi.get_container_stats(container_id)) - dockerapi.containerIds_to_update.append(container_id) - - while True: - if await dockerapi.redis_client.exists(container_id + '_stats'): - break - await asyncio.sleep(1.5) - - stats = json.loads(await dockerapi.redis_client.get(container_id + '_stats')) - return Response(content=json.dumps(stats, indent=4), media_type="application/json") - - -# PubSub Handler -async def handle_pubsub_messages(channel: aioredis.client.PubSub): - global dockerapi - - while True: - try: - async with async_timeout.timeout(60): - message = await channel.get_message(ignore_subscribe_messages=True, timeout=30) - if message is not None: - # Parse message - data_json = json.loads(message['data'].decode('utf-8')) - dockerapi.logger.info(f"PubSub Received - {json.dumps(data_json)}") - - # Handle api_call - if 'api_call' in data_json: - # api_call: container_post - if data_json['api_call'] == "container_post": - if 'post_action' in data_json and 'container_name' in data_json: - try: - """Dispatch container_post api call""" - request_json = {} - if data_json['post_action'] == 'exec': - if 'request' in data_json: - request_json = data_json['request'] - if 'cmd' in request_json: - if 'task' in request_json: - api_call_method_name = '__'.join(['container_post', str(data_json['post_action']), str(request_json['cmd']), str(request_json['task']) ]) - else: - dockerapi.logger.error("api call: task missing") - else: - dockerapi.logger.error("api call: cmd missing") - else: - dockerapi.logger.error("api call: request missing") - else: - api_call_method_name = '__'.join(['container_post', str(data_json['post_action'])]) - - if api_call_method_name: - api_call_method = getattr(dockerapi, api_call_method_name) - if api_call_method: - dockerapi.logger.info("api call: %s, container_name: %s" % (api_call_method_name, data_json['container_name'])) - api_call_method(request_json, container_name=data_json['container_name']) - else: - dockerapi.logger.error("api call not found: %s, container_name: %s" % (api_call_method_name, data_json['container_name'])) - except Exception as e: - dockerapi.logger.error("container_post: %s" % str(e)) - else: - dockerapi.logger.error("api call: missing container_name, post_action or request") - else: - dockerapi.logger.error("Unknown PubSub received - %s" % json.dumps(data_json)) - else: - dockerapi.logger.error("Unknown PubSub received - %s" % json.dumps(data_json)) - - await asyncio.sleep(0.0) - except asyncio.TimeoutError: - pass - -if __name__ == '__main__': - uvicorn.run( - app, - host="0.0.0.0", - port=443, - ssl_certfile="/app/dockerapi_cert.pem", - ssl_keyfile="/app/dockerapi_key.pem", - log_level="info", - loop="none" - ) diff --git a/data/Dockerfiles/dockerapi/modules/DockerApi.py b/data/Dockerfiles/dockerapi/modules/DockerApi.py deleted file mode 100644 index 4701cbf51..000000000 --- a/data/Dockerfiles/dockerapi/modules/DockerApi.py +++ /dev/null @@ -1,626 +0,0 @@ -import psutil -import sys -import os -import re -import time -import json -import asyncio -import platform -from datetime import datetime -from fastapi import FastAPI, Response, Request - -class DockerApi: - def __init__(self, redis_client, sync_docker_client, async_docker_client, logger): - self.redis_client = redis_client - self.sync_docker_client = sync_docker_client - self.async_docker_client = async_docker_client - self.logger = logger - - self.host_stats_isUpdating = False - self.containerIds_to_update = [] - - # api call: container_post - post_action: stop - def container_post__stop(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(all=True, filters=filters): - container.stop() - - res = { 'type': 'success', 'msg': 'command completed successfully'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: start - def container_post__start(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(all=True, filters=filters): - container.start() - - res = { 'type': 'success', 'msg': 'command completed successfully'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: restart - def container_post__restart(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(all=True, filters=filters): - container.restart() - - res = { 'type': 'success', 'msg': 'command completed successfully'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: top - def container_post__top(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(all=True, filters=filters): - res = { 'type': 'success', 'msg': container.top()} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: stats - def container_post__stats(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(all=True, filters=filters): - for stat in container.stats(decode=True, stream=True): - res = { 'type': 'success', 'msg': stat} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: mailq - task: delete - def container_post__exec__mailq__delete(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'items' in request_json: - r = re.compile("^[0-9a-fA-F]+$") - filtered_qids = filter(r.match, request_json['items']) - if filtered_qids: - flagged_qids = ['-d %s' % i for i in filtered_qids] - sanitized_string = str(' '.join(flagged_qids)) - for container in self.sync_docker_client.containers.list(filters=filters): - postsuper_r = container.exec_run(["/bin/bash", "-c", "/usr/sbin/postsuper " + sanitized_string]) - return self.exec_run_handler('generic', postsuper_r) - # api call: container_post - post_action: exec - cmd: mailq - task: hold - def container_post__exec__mailq__hold(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'items' in request_json: - r = re.compile("^[0-9a-fA-F]+$") - filtered_qids = filter(r.match, request_json['items']) - if filtered_qids: - flagged_qids = ['-h %s' % i for i in filtered_qids] - sanitized_string = str(' '.join(flagged_qids)) - for container in self.sync_docker_client.containers.list(filters=filters): - postsuper_r = container.exec_run(["/bin/bash", "-c", "/usr/sbin/postsuper " + sanitized_string]) - return self.exec_run_handler('generic', postsuper_r) - # api call: container_post - post_action: exec - cmd: mailq - task: cat - def container_post__exec__mailq__cat(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'items' in request_json: - r = re.compile("^[0-9a-fA-F]+$") - filtered_qids = filter(r.match, request_json['items']) - if filtered_qids: - sanitized_string = str(' '.join(filtered_qids)) - - for container in self.sync_docker_client.containers.list(filters=filters): - postcat_return = container.exec_run(["/bin/bash", "-c", "/usr/sbin/postcat -q " + sanitized_string], user='postfix') - if not postcat_return: - postcat_return = 'err: invalid' - return self.exec_run_handler('utf8_text_only', postcat_return) - # api call: container_post - post_action: exec - cmd: mailq - task: unhold - def container_post__exec__mailq__unhold(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'items' in request_json: - r = re.compile("^[0-9a-fA-F]+$") - filtered_qids = filter(r.match, request_json['items']) - if filtered_qids: - flagged_qids = ['-H %s' % i for i in filtered_qids] - sanitized_string = str(' '.join(flagged_qids)) - for container in self.sync_docker_client.containers.list(filters=filters): - postsuper_r = container.exec_run(["/bin/bash", "-c", "/usr/sbin/postsuper " + sanitized_string]) - return self.exec_run_handler('generic', postsuper_r) - # api call: container_post - post_action: exec - cmd: mailq - task: deliver - def container_post__exec__mailq__deliver(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'items' in request_json: - r = re.compile("^[0-9a-fA-F]+$") - filtered_qids = filter(r.match, request_json['items']) - if filtered_qids: - flagged_qids = ['-i %s' % i for i in filtered_qids] - for container in self.sync_docker_client.containers.list(filters=filters): - for i in flagged_qids: - postqueue_r = container.exec_run(["/bin/bash", "-c", "/usr/sbin/postqueue " + i], user='postfix') - # todo: check each exit code - res = { 'type': 'success', 'msg': 'Scheduled immediate delivery'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: mailq - task: list - def container_post__exec__mailq__list(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - mailq_return = container.exec_run(["/usr/sbin/postqueue", "-j"], user='postfix') - return self.exec_run_handler('utf8_text_only', mailq_return) - # api call: container_post - post_action: exec - cmd: mailq - task: flush - def container_post__exec__mailq__flush(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - postqueue_r = container.exec_run(["/usr/sbin/postqueue", "-f"], user='postfix') - return self.exec_run_handler('generic', postqueue_r) - # api call: container_post - post_action: exec - cmd: mailq - task: super_delete - def container_post__exec__mailq__super_delete(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - postsuper_r = container.exec_run(["/usr/sbin/postsuper", "-d", "ALL"]) - return self.exec_run_handler('generic', postsuper_r) - # api call: container_post - post_action: exec - cmd: system - task: fts_rescan - def container_post__exec__system__fts_rescan(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'username' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - rescan_return = container.exec_run(["/bin/bash", "-c", "/usr/bin/doveadm fts rescan -u '" + request_json['username'].replace("'", "'\\''") + "'"], user='vmail') - if rescan_return.exit_code == 0: - res = { 'type': 'success', 'msg': 'fts_rescan: rescan triggered'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - res = { 'type': 'warning', 'msg': 'fts_rescan error'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - if 'all' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - rescan_return = container.exec_run(["/bin/bash", "-c", "/usr/bin/doveadm fts rescan -A"], user='vmail') - if rescan_return.exit_code == 0: - res = { 'type': 'success', 'msg': 'fts_rescan: rescan triggered'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - res = { 'type': 'warning', 'msg': 'fts_rescan error'} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: system - task: df - def container_post__exec__system__df(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'dir' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - df_return = container.exec_run(["/bin/bash", "-c", "/bin/df -H '" + request_json['dir'].replace("'", "'\\''") + "' | /usr/bin/tail -n1 | /usr/bin/tr -s [:blank:] | /usr/bin/tr ' ' ','"], user='nobody') - if df_return.exit_code == 0: - return df_return.output.decode('utf-8').rstrip() - else: - return "0,0,0,0,0,0" - # api call: container_post - post_action: exec - cmd: system - task: mysql_upgrade - def container_post__exec__system__mysql_upgrade(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - sql_return = container.exec_run(["/bin/bash", "-c", "/usr/bin/mysql_upgrade -uroot -p'" + os.environ['DBROOT'].replace("'", "'\\''") + "'\n"], user='mysql') - if sql_return.exit_code == 0: - matched = False - for line in sql_return.output.decode('utf-8').split("\n"): - if 'is already upgraded to' in line: - matched = True - if matched: - res = { 'type': 'success', 'msg':'mysql_upgrade: already upgraded', 'text': sql_return.output.decode('utf-8')} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - container.restart() - res = { 'type': 'warning', 'msg':'mysql_upgrade: upgrade was applied', 'text': sql_return.output.decode('utf-8')} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - res = { 'type': 'error', 'msg': 'mysql_upgrade: error running command', 'text': sql_return.output.decode('utf-8')} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: system - task: mysql_tzinfo_to_sql - def container_post__exec__system__mysql_tzinfo_to_sql(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - sql_return = container.exec_run(["/bin/bash", "-c", "/usr/bin/mysql_tzinfo_to_sql /usr/share/zoneinfo | /bin/sed 's/Local time zone must be set--see zic manual page/FCTY/' | /usr/bin/mysql -uroot -p'" + os.environ['DBROOT'].replace("'", "'\\''") + "' mysql \n"], user='mysql') - if sql_return.exit_code == 0: - res = { 'type': 'info', 'msg': 'mysql_tzinfo_to_sql: command completed successfully', 'text': sql_return.output.decode('utf-8')} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - res = { 'type': 'error', 'msg': 'mysql_tzinfo_to_sql: error running command', 'text': sql_return.output.decode('utf-8')} - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: reload - task: dovecot - def container_post__exec__reload__dovecot(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - reload_return = container.exec_run(["/bin/bash", "-c", "/usr/sbin/dovecot reload"]) - return self.exec_run_handler('generic', reload_return) - # api call: container_post - post_action: exec - cmd: reload - task: postfix - def container_post__exec__reload__postfix(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - reload_return = container.exec_run(["/bin/bash", "-c", "/usr/sbin/postfix reload"]) - return self.exec_run_handler('generic', reload_return) - # api call: container_post - post_action: exec - cmd: reload - task: nginx - def container_post__exec__reload__nginx(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - reload_return = container.exec_run(["/bin/sh", "-c", "/usr/sbin/nginx -s reload"]) - return self.exec_run_handler('generic', reload_return) - # api call: container_post - post_action: exec - cmd: sieve - task: list - def container_post__exec__sieve__list(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'username' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - sieve_return = container.exec_run(["/bin/bash", "-c", "/usr/bin/doveadm sieve list -u '" + request_json['username'].replace("'", "'\\''") + "'"]) - return self.exec_run_handler('utf8_text_only', sieve_return) - # api call: container_post - post_action: exec - cmd: sieve - task: print - def container_post__exec__sieve__print(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'username' in request_json and 'script_name' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - cmd = ["/bin/bash", "-c", "/usr/bin/doveadm sieve get -u '" + request_json['username'].replace("'", "'\\''") + "' '" + request_json['script_name'].replace("'", "'\\''") + "'"] - sieve_return = container.exec_run(cmd) - return self.exec_run_handler('utf8_text_only', sieve_return) - # api call: container_post - post_action: exec - cmd: maildir - task: cleanup - def container_post__exec__maildir__cleanup(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'maildir' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - sane_name = re.sub(r'\W+', '', request_json['maildir']) - vmail_name = request_json['maildir'].replace("'", "'\\''") - cmd_vmail = "if [[ -d '/var/vmail/" + vmail_name + "' ]]; then /bin/mv '/var/vmail/" + vmail_name + "' '/var/vmail/_garbage/" + str(int(time.time())) + "_" + sane_name + "'; fi" - index_name = request_json['maildir'].split("/") - if len(index_name) > 1: - index_name = index_name[1].replace("'", "'\\''") + "@" + index_name[0].replace("'", "'\\''") - cmd_vmail_index = "if [[ -d '/var/vmail_index/" + index_name + "' ]]; then /bin/mv '/var/vmail_index/" + index_name + "' '/var/vmail/_garbage/" + str(int(time.time())) + "_" + sane_name + "_index'; fi" - cmd = ["/bin/bash", "-c", cmd_vmail + " && " + cmd_vmail_index] - else: - cmd = ["/bin/bash", "-c", cmd_vmail] - maildir_cleanup = container.exec_run(cmd, user='vmail') - return self.exec_run_handler('generic', maildir_cleanup) - # api call: container_post - post_action: exec - cmd: maildir - task: move - def container_post__exec__maildir__move(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'old_maildir' in request_json and 'new_maildir' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - vmail_name = request_json['old_maildir'].replace("'", "'\\''") - new_vmail_name = request_json['new_maildir'].replace("'", "'\\''") - cmd_vmail = f"if [[ -d '/var/vmail/{vmail_name}' ]]; then /bin/mv '/var/vmail/{vmail_name}' '/var/vmail/{new_vmail_name}'; fi" - - index_name = request_json['old_maildir'].split("/") - new_index_name = request_json['new_maildir'].split("/") - if len(index_name) > 1 and len(new_index_name) > 1: - index_name = index_name[1].replace("'", "'\\''") + "@" + index_name[0].replace("'", "'\\''") - new_index_name = new_index_name[1].replace("'", "'\\''") + "@" + new_index_name[0].replace("'", "'\\''") - cmd_vmail_index = f"if [[ -d '/var/vmail_index/{index_name}' ]]; then /bin/mv '/var/vmail_index/{index_name}' '/var/vmail_index/{new_index_name}_index'; fi" - cmd = ["/bin/bash", "-c", cmd_vmail + " && " + cmd_vmail_index] - else: - cmd = ["/bin/bash", "-c", cmd_vmail] - maildir_move = container.exec_run(cmd, user='vmail') - return self.exec_run_handler('generic', maildir_move) - # api call: container_post - post_action: exec - cmd: rspamd - task: worker_password - def container_post__exec__rspamd__worker_password(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'raw' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - cmd = "/usr/bin/rspamadm pw -e -p '" + request_json['raw'].replace("'", "'\\''") + "' 2> /dev/null" - cmd_response = self.exec_cmd_container(container, cmd, user="_rspamd") - - matched = False - for line in cmd_response.split("\n"): - if '$2$' in line: - hash = line.strip() - hash_out = re.search(r'\$2\$.+$', hash).group(0) - rspamd_passphrase_hash = re.sub(r'[^0-9a-zA-Z\$]+', '', hash_out.rstrip()) - rspamd_password_filename = "/etc/rspamd/override.d/worker-controller-password.inc" - cmd = '''/bin/echo 'enable_password = "%s";' > %s && cat %s''' % (rspamd_passphrase_hash, rspamd_password_filename, rspamd_password_filename) - cmd_response = self.exec_cmd_container(container, cmd, user="_rspamd") - if rspamd_passphrase_hash.startswith("$2$") and rspamd_passphrase_hash in cmd_response: - container.restart() - matched = True - if matched: - res = { 'type': 'success', 'msg': 'command completed successfully' } - self.logger.info('success changing Rspamd password') - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - self.logger.error('failed changing Rspamd password') - res = { 'type': 'danger', 'msg': 'command did not complete' } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: sogo - task: rename - def container_post__exec__sogo__rename_user(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - if 'old_username' in request_json and 'new_username' in request_json: - for container in self.sync_docker_client.containers.list(filters=filters): - old_username = request_json['old_username'].replace("'", "'\\''") - new_username = request_json['new_username'].replace("'", "'\\''") - - sogo_return = container.exec_run(["/bin/bash", "-c", f"sogo-tool rename-user '{old_username}' '{new_username}'"], user='sogo') - return self.exec_run_handler('generic', sogo_return) - # api call: container_post - post_action: exec - cmd: doveadm - task: get_acl - def container_post__exec__doveadm__get_acl(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - id = request_json['id'].replace("'", "'\\''") - - shared_folders = container.exec_run(["/bin/bash", "-c", f"doveadm mailbox list -u '{id}'"]) - shared_folders = shared_folders.output.decode('utf-8') - shared_folders = shared_folders.splitlines() - - formatted_acls = [] - mailbox_seen = [] - for shared_folder in shared_folders: - if "Shared" not in shared_folder: - mailbox = shared_folder.replace("'", "'\\''") - if mailbox in mailbox_seen: - continue - - acls = container.exec_run(["/bin/bash", "-c", f"doveadm acl get -u '{id}' '{mailbox}'"]) - acls = acls.output.decode('utf-8').strip().splitlines() - if len(acls) >= 2: - for acl in acls[1:]: - user_id, rights = acl.split(maxsplit=1) - user_id = user_id.split('=')[1] - mailbox_seen.append(mailbox) - formatted_acls.append({ 'user': id, 'id': user_id, 'mailbox': mailbox, 'rights': rights.split() }) - elif "Shared" in shared_folder and "/" in shared_folder: - shared_folder = shared_folder.split("/") - if len(shared_folder) < 3: - continue - - user = shared_folder[1].replace("'", "'\\''") - mailbox = '/'.join(shared_folder[2:]).replace("'", "'\\''") - if mailbox in mailbox_seen: - continue - - acls = container.exec_run(["/bin/bash", "-c", f"doveadm acl get -u '{user}' '{mailbox}'"]) - acls = acls.output.decode('utf-8').strip().splitlines() - if len(acls) >= 2: - for acl in acls[1:]: - user_id, rights = acl.split(maxsplit=1) - user_id = user_id.split('=')[1].replace("'", "'\\''") - if user_id == id and mailbox not in mailbox_seen: - mailbox_seen.append(mailbox) - formatted_acls.append({ 'user': user, 'id': id, 'mailbox': mailbox, 'rights': rights.split() }) - - return Response(content=json.dumps(formatted_acls, indent=4), media_type="application/json") - # api call: container_post - post_action: exec - cmd: doveadm - task: delete_acl - def container_post__exec__doveadm__delete_acl(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - user = request_json['user'].replace("'", "'\\''") - mailbox = request_json['mailbox'].replace("'", "'\\''") - id = request_json['id'].replace("'", "'\\''") - - if user and mailbox and id: - acl_delete_return = container.exec_run(["/bin/bash", "-c", f"doveadm acl delete -u '{user}' '{mailbox}' 'user={id}'"]) - return self.exec_run_handler('generic', acl_delete_return) - # api call: container_post - post_action: exec - cmd: doveadm - task: set_acl - def container_post__exec__doveadm__set_acl(self, request_json, **kwargs): - if 'container_id' in kwargs: - filters = {"id": kwargs['container_id']} - elif 'container_name' in kwargs: - filters = {"name": kwargs['container_name']} - - for container in self.sync_docker_client.containers.list(filters=filters): - user = request_json['user'].replace("'", "'\\''") - mailbox = request_json['mailbox'].replace("'", "'\\''") - id = request_json['id'].replace("'", "'\\''") - rights = "" - - available_rights = [ - "admin", - "create", - "delete", - "expunge", - "insert", - "lookup", - "post", - "read", - "write", - "write-deleted", - "write-seen" - ] - for right in request_json['rights']: - right = right.replace("'", "'\\''").lower() - if right in available_rights: - rights += right + " " - - if user and mailbox and id and rights: - acl_set_return = container.exec_run(["/bin/bash", "-c", f"doveadm acl set -u '{user}' '{mailbox}' 'user={id}' {rights}"]) - return self.exec_run_handler('generic', acl_set_return) - - - # Collect host stats - async def get_host_stats(self, wait=5): - try: - system_time = datetime.now() - host_stats = { - "cpu": { - "cores": psutil.cpu_count(), - "usage": psutil.cpu_percent() - }, - "memory": { - "total": psutil.virtual_memory().total, - "usage": psutil.virtual_memory().percent, - "swap": psutil.swap_memory() - }, - "uptime": time.time() - psutil.boot_time(), - "system_time": system_time.strftime("%d.%m.%Y %H:%M:%S"), - "architecture": platform.machine() - } - - await self.redis_client.set('host_stats', json.dumps(host_stats), ex=10) - except Exception as e: - res = { - "type": "danger", - "msg": str(e) - } - - await asyncio.sleep(wait) - self.host_stats_isUpdating = False - # Collect container stats - async def get_container_stats(self, container_id, wait=5, stop=False): - if container_id and container_id.isalnum(): - try: - for container in (await self.async_docker_client.containers.list()): - if container._id == container_id: - res = await container.stats(stream=False) - - if await self.redis_client.exists(container_id + '_stats'): - stats = json.loads(await self.redis_client.get(container_id + '_stats')) - else: - stats = [] - stats.append(res[0]) - if len(stats) > 3: - del stats[0] - await self.redis_client.set(container_id + '_stats', json.dumps(stats), ex=60) - except Exception as e: - res = { - "type": "danger", - "msg": str(e) - } - else: - res = { - "type": "danger", - "msg": "no or invalid id defined" - } - - await asyncio.sleep(wait) - if stop == True: - # update task was called second time, stop - self.containerIds_to_update.remove(container_id) - else: - # call update task a second time - await self.get_container_stats(container_id, wait=0, stop=True) - - def exec_cmd_container(self, container, cmd, user, timeout=2, shell_cmd="/bin/bash"): - def recv_socket_data(c_socket, timeout): - c_socket.setblocking(0) - total_data=[] - data='' - begin=time.time() - while True: - if total_data and time.time()-begin > timeout: - break - elif time.time()-begin > timeout*2: - break - try: - data = c_socket.recv(8192) - if data: - total_data.append(data.decode('utf-8')) - #change the beginning time for measurement - begin=time.time() - else: - #sleep for sometime to indicate a gap - time.sleep(0.1) - break - except: - pass - return ''.join(total_data) - - try : - socket = container.exec_run([shell_cmd], stdin=True, socket=True, user=user).output._sock - if not cmd.endswith("\n"): - cmd = cmd + "\n" - socket.send(cmd.encode('utf-8')) - data = recv_socket_data(socket, timeout) - socket.close() - return data - except Exception as e: - self.logger.error("error - exec_cmd_container: %s" % str(e)) - traceback.print_exc(file=sys.stdout) - - def exec_run_handler(self, type, output): - if type == 'generic': - if output.exit_code == 0: - res = { 'type': 'success', 'msg': 'command completed successfully' } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - else: - res = { 'type': 'danger', 'msg': 'command failed: ' + output.output.decode('utf-8') } - return Response(content=json.dumps(res, indent=4), media_type="application/json") - if type == 'utf8_text_only': - return Response(content=output.output.decode('utf-8'), media_type="text/plain") diff --git a/data/Dockerfiles/dockerapi/modules/__init__.py b/data/Dockerfiles/dockerapi/modules/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/data/Dockerfiles/dovecot/Dockerfile b/data/Dockerfiles/dovecot/Dockerfile index f1152c8a1..bde6c3de0 100644 --- a/data/Dockerfiles/dovecot/Dockerfile +++ b/data/Dockerfiles/dovecot/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.21 LABEL maintainer="The Infrastructure Company GmbH " @@ -135,5 +139,14 @@ COPY quota_notify.py /usr/local/bin/quota_notify.py COPY repl_health.sh /usr/local/bin/repl_health.sh COPY optimize-fts.sh /usr/local/bin/optimize-fts.sh -ENTRYPOINT ["/docker-entrypoint.sh"] -CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=dovecot \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh /usr/bin/supervisord -c /etc/supervisor/supervisord.conf" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/dovecot/sa-rules.sh b/data/Dockerfiles/dovecot/sa-rules.sh index e948d438c..218f22faf 100755 --- a/data/Dockerfiles/dovecot/sa-rules.sh +++ b/data/Dockerfiles/dovecot/sa-rules.sh @@ -24,13 +24,12 @@ fi sed -i -e 's/\([^\\]\)\$\([^\/]\)/\1\\$\2/g' /etc/rspamd/custom/sa-rules if [[ "$(cat /etc/rspamd/custom/sa-rules | md5sum | cut -d' ' -f1)" != "${HASH_SA_RULES}" ]]; then - CONTAINER_NAME=rspamd-mailcow - CONTAINER_ID=$(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | \ - jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" | \ - jq -rc "select( .name | tostring | contains(\"${CONTAINER_NAME}\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id") - if [[ ! -z ${CONTAINER_ID} ]]; then - curl --silent --insecure -XPOST --connect-timeout 15 --max-time 120 https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${CONTAINER_ID}/restart - fi + REDIS_HOST="${REDIS_SLAVEOF_IP:-redis-mailcow}" + REDIS_PORT="${REDIS_SLAVEOF_PORT:-6379}" + REQ_ID="$(date +%s%N)" + PAYLOAD="{\"cmd\":\"restart\",\"request_id\":\"${REQ_ID}\",\"issued_by\":\"dovecot-sa-rules\"}" + redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" -a "${REDISPASS}" --no-auth-warning \ + PUBLISH mailcow.control.rspamd "${PAYLOAD}" >/dev/null 2>&1 || true fi # Cleanup diff --git a/data/Dockerfiles/dovecot/syslog-ng-redis_slave.conf b/data/Dockerfiles/dovecot/syslog-ng-redis_slave.conf index c028bcdbf..b553dd5de 100644 --- a/data/Dockerfiles/dovecot/syslog-ng-redis_slave.conf +++ b/data/Dockerfiles/dovecot/syslog-ng-redis_slave.conf @@ -21,7 +21,7 @@ destination d_redis_ui_log { persist-name("redis1") port(`REDIS_SLAVEOF_PORT`) auth("`REDISPASS`") - command("LPUSH" "DOVECOT_MAILLOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "DOVECOT_MAILLOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; destination d_redis_f2b_channel { diff --git a/data/Dockerfiles/dovecot/syslog-ng.conf b/data/Dockerfiles/dovecot/syslog-ng.conf index 1918f4a23..0052063a3 100644 --- a/data/Dockerfiles/dovecot/syslog-ng.conf +++ b/data/Dockerfiles/dovecot/syslog-ng.conf @@ -21,7 +21,7 @@ destination d_redis_ui_log { persist-name("redis1") port(6379) auth("`REDISPASS`") - command("LPUSH" "DOVECOT_MAILLOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "DOVECOT_MAILLOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; destination d_redis_f2b_channel { diff --git a/data/Dockerfiles/host-agent/Dockerfile b/data/Dockerfiles/host-agent/Dockerfile new file mode 100644 index 000000000..8aedc592c --- /dev/null +++ b/data/Dockerfiles/host-agent/Dockerfile @@ -0,0 +1,21 @@ +# host-agent: dedicated container that reads /host/proc to publish host-level +# stats and answer exec.df / exec.host-stats commands. Reuses the same agent +# binary; behaviour selected via MAILCOW_AGENT_SERVICE=host. +# +# Requires: +# volumes: +# - /proc:/host/proc:ro +# - /:/host/rootfs:ro + +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.0 + +FROM ${AGENT_IMAGE} AS agent + +FROM alpine:3.20 +RUN apk add --no-cache ca-certificates tzdata +COPY --from=agent /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=agent /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=host + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] diff --git a/data/Dockerfiles/netfilter/Dockerfile b/data/Dockerfiles/netfilter/Dockerfile index 70cd49c1a..bbe8f5282 100644 --- a/data/Dockerfiles/netfilter/Dockerfile +++ b/data/Dockerfiles/netfilter/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.23 LABEL maintainer = "The Infrastructure Company GmbH " @@ -40,4 +44,14 @@ COPY ./docker-entrypoint.sh /app/ RUN chmod +x /app/docker-entrypoint.sh -CMD ["/bin/sh", "-c", "/app/docker-entrypoint.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=netfilter \ + MAILCOW_AGENT_MAIN_CMD="/app/docker-entrypoint.sh" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/nginx/Dockerfile b/data/Dockerfiles/nginx/Dockerfile index 7d2ce34f3..6d1aac7d9 100644 --- a/data/Dockerfiles/nginx/Dockerfile +++ b/data/Dockerfiles/nginx/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM nginx:alpine LABEL maintainer "The Infrastructure Company GmbH " @@ -14,5 +18,14 @@ RUN mkdir -p /etc/nginx/includes COPY ./bootstrap.py / COPY ./docker-entrypoint.sh / -ENTRYPOINT ["/docker-entrypoint.sh"] -CMD ["nginx", "-g", "daemon off;"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=nginx \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh nginx -g 'daemon off;'" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/olefy/Dockerfile b/data/Dockerfiles/olefy/Dockerfile index 845b125f6..6f56806d8 100644 --- a/data/Dockerfiles/olefy/Dockerfile +++ b/data/Dockerfiles/olefy/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.21 LABEL maintainer = "The Infrastructure Company GmbH " @@ -18,6 +22,16 @@ ADD olefy.py /app/ RUN chown -R nobody:nobody /app /tmp +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + USER nobody -CMD ["python3", "-u", "/app/olefy.py"] +ENV MAILCOW_AGENT_SERVICE=olefy \ + MAILCOW_AGENT_MAIN_CMD="python3 -u /app/olefy.py" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/phpfpm/Dockerfile b/data/Dockerfiles/phpfpm/Dockerfile index 7917d7f7d..9106f9fe3 100644 --- a/data/Dockerfiles/phpfpm/Dockerfile +++ b/data/Dockerfiles/phpfpm/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM php:8.2-fpm-alpine3.21 LABEL maintainer = "The Infrastructure Company GmbH " @@ -72,7 +76,7 @@ RUN apk add -U --no-cache autoconf \ && pecl clear-cache \ && docker-php-ext-configure intl \ && docker-php-ext-configure exif \ - && docker-php-ext-configure gd --with-freetype=/usr/include/ \ + && docker-php-ext-configure gd --with-freetype=/usr/include/ \ --with-jpeg=/usr/include/ \ --with-webp \ --with-xpm \ @@ -109,6 +113,14 @@ RUN apk add -U --no-cache autoconf \ COPY ./docker-entrypoint.sh / -ENTRYPOINT ["/docker-entrypoint.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli -CMD ["php-fpm"] +ENV MAILCOW_AGENT_SERVICE=php-fpm \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh php-fpm" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/phpfpm/docker-entrypoint.sh b/data/Dockerfiles/phpfpm/docker-entrypoint.sh index d7fa15556..fc3c534b7 100755 --- a/data/Dockerfiles/phpfpm/docker-entrypoint.sh +++ b/data/Dockerfiles/phpfpm/docker-entrypoint.sh @@ -29,63 +29,35 @@ session.save_handler = redis session.save_path = "tcp://'${REDIS_HOST}':'${REDIS_PORT}'?auth='${REDISPASS}'" ' > /usr/local/etc/php/conf.d/session_store.ini -# Check mysql_upgrade (master and slave) -CONTAINER_ID= -until [[ ! -z "${CONTAINER_ID}" ]] && [[ "${CONTAINER_ID}" =~ ^[[:alnum:]]*$ ]]; do - CONTAINER_ID=$(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" 2> /dev/null | jq -rc "select( .name | tostring | contains(\"mysql-mailcow\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id" 2> /dev/null) - echo "Could not get mysql-mailcow container id... trying again" - sleep 2 -done -echo "MySQL @ ${CONTAINER_ID}" -SQL_LOOP_C=0 -SQL_CHANGED=0 -until [[ ${SQL_UPGRADE_STATUS} == 'success' ]]; do - if [ ${SQL_LOOP_C} -gt 4 ]; then - echo "Tried to upgrade MySQL and failed, giving up after ${SQL_LOOP_C} retries and starting container (oops, not good)" +# Wait for MariaDB. The upstream mariadb image already runs mariadb-upgrade +# itself on startup when needed +echo "Waiting for MariaDB socket at /var/run/mysqld/mysqld.sock..." +WAIT_C=0 +until mariadb --skip-ssl --socket=/var/run/mysqld/mysqld.sock -u${DBUSER} -p${DBPASS} -e "SELECT 1" >/dev/null 2>&1; do + WAIT_C=$((WAIT_C+1)) + if [ ${WAIT_C} -gt 60 ]; then + echo "MariaDB did not respond after 60s — continuing anyway." break fi - SQL_FULL_UPGRADE_RETURN=$(curl --silent --insecure -XPOST https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${CONTAINER_ID}/exec -d '{"cmd":"system", "task":"mysql_upgrade"}' --silent -H 'Content-type: application/json') - SQL_UPGRADE_STATUS=$(echo ${SQL_FULL_UPGRADE_RETURN} | jq -r .type) - SQL_LOOP_C=$((SQL_LOOP_C+1)) - echo "SQL upgrade iteration #${SQL_LOOP_C}" - if [[ ${SQL_UPGRADE_STATUS} == 'warning' ]]; then - SQL_CHANGED=1 - echo "MySQL applied an upgrade, debug output:" - echo ${SQL_FULL_UPGRADE_RETURN} - sleep 3 - while ! mariadb-admin status --ssl=false --socket=/var/run/mysqld/mysqld.sock -u${DBUSER} -p${DBPASS} --silent; do - echo "Waiting for SQL to return, please wait" - sleep 2 - done - continue - elif [[ ${SQL_UPGRADE_STATUS} == 'success' ]]; then - echo "MySQL is up-to-date - debug output:" - echo ${SQL_FULL_UPGRADE_RETURN} - else - echo "No valid reponse for mysql_upgrade was received, debug output:" - echo ${SQL_FULL_UPGRADE_RETURN} - fi + sleep 1 done +echo "MariaDB is ready." -# doing post-installation stuff, if SQL was upgraded (master and slave) -if [ ${SQL_CHANGED} -eq 1 ]; then - POSTFIX=$(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" 2> /dev/null | jq -rc "select( .name | tostring | contains(\"postfix-mailcow\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id" 2> /dev/null) - if [[ -z "${POSTFIX}" ]] || ! [[ "${POSTFIX}" =~ ^[[:alnum:]]*$ ]]; then - echo "Could not determine Postfix container ID, skipping Postfix restart." - else - echo "Restarting Postfix" - curl -X POST --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${POSTFIX}/restart | jq -r '.msg' - echo "Sleeping 5 seconds..." - sleep 5 - fi -fi - -# Check mysql tz import (master and slave) +# Timezone tables — check if CONVERT_TZ works, import if it returns NULL. +# Some Alpine builds drop mariadb-tzinfo-to-sql; fall back to a Python +# emitter that produces the same INSERT statements from /usr/share/zoneinfo. TZ_CHECK=$(mariadb --skip-ssl --socket=/var/run/mysqld/mysqld.sock -u ${DBUSER} -p${DBPASS} ${DBNAME} -e "SELECT CONVERT_TZ('2019-11-02 23:33:00','Europe/Berlin','UTC') AS time;" -BN 2> /dev/null) if [[ -z ${TZ_CHECK} ]] || [[ "${TZ_CHECK}" == "NULL" ]]; then - SQL_FULL_TZINFO_IMPORT_RETURN=$(curl --silent --insecure -XPOST https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${CONTAINER_ID}/exec -d '{"cmd":"system", "task":"mysql_tzinfo_to_sql"}' --silent -H 'Content-type: application/json') - echo "MySQL mysql_tzinfo_to_sql - debug output:" - echo ${SQL_FULL_TZINFO_IMPORT_RETURN} + echo "Importing timezone data into mysql.time_zone_* …" + if command -v mariadb-tzinfo-to-sql >/dev/null 2>&1; then + mariadb-tzinfo-to-sql /usr/share/zoneinfo 2>/dev/null \ + | mariadb --skip-ssl --socket=/var/run/mysqld/mysqld.sock -uroot -p${DBROOT} mysql + elif command -v mysql_tzinfo_to_sql >/dev/null 2>&1; then + mysql_tzinfo_to_sql /usr/share/zoneinfo 2>/dev/null \ + | mariadb --skip-ssl --socket=/var/run/mysqld/mysqld.sock -uroot -p${DBROOT} mysql + else + echo "No tzinfo-to-sql tool available — skipping timezone import." + fi fi if [[ "${MASTER}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then diff --git a/data/Dockerfiles/postfix-tlspol/Dockerfile b/data/Dockerfiles/postfix-tlspol/Dockerfile index 68f6ecced..ee8285623 100644 --- a/data/Dockerfiles/postfix-tlspol/Dockerfile +++ b/data/Dockerfiles/postfix-tlspol/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM golang:1.25-bookworm AS builder WORKDIR /src @@ -45,6 +49,14 @@ RUN chmod +x /opt/postfix-tlspol.sh \ /docker-entrypoint.sh RUN rm -rf /tmp/* /var/tmp/* -ENTRYPOINT ["/docker-entrypoint.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli -CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] \ No newline at end of file +ENV MAILCOW_AGENT_SERVICE=postfix-tlspol \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh /usr/bin/supervisord -c /etc/supervisor/supervisord.conf" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] \ No newline at end of file diff --git a/data/Dockerfiles/postfix-tlspol/syslog-ng-redis_slave.conf b/data/Dockerfiles/postfix-tlspol/syslog-ng-redis_slave.conf index 3862a3547..719dbfc1a 100644 --- a/data/Dockerfiles/postfix-tlspol/syslog-ng-redis_slave.conf +++ b/data/Dockerfiles/postfix-tlspol/syslog-ng-redis_slave.conf @@ -21,7 +21,7 @@ destination d_redis_ui_log { persist-name("redis1") port(`REDIS_SLAVEOF_PORT`) auth("`REDISPASS`") - command("LPUSH" "POSTFIX_MAILLOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "POSTFIX_MAILLOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; filter f_mail { facility(mail); }; diff --git a/data/Dockerfiles/postfix-tlspol/syslog-ng.conf b/data/Dockerfiles/postfix-tlspol/syslog-ng.conf index 7126c1250..54d3a5ce2 100644 --- a/data/Dockerfiles/postfix-tlspol/syslog-ng.conf +++ b/data/Dockerfiles/postfix-tlspol/syslog-ng.conf @@ -21,7 +21,7 @@ destination d_redis_ui_log { persist-name("redis1") port(6379) auth("`REDISPASS`") - command("LPUSH" "POSTFIX_MAILLOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "POSTFIX_MAILLOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; filter f_mail { facility(mail); }; diff --git a/data/Dockerfiles/postfix/Dockerfile b/data/Dockerfiles/postfix/Dockerfile index 994612ec4..419e88970 100644 --- a/data/Dockerfiles/postfix/Dockerfile +++ b/data/Dockerfiles/postfix/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM debian:bookworm-slim LABEL maintainer="The Infrastructure Company GmbH " @@ -58,6 +62,14 @@ RUN rm -rf /tmp/* /var/tmp/* EXPOSE 588 -ENTRYPOINT ["/docker-entrypoint.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli -CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] +ENV MAILCOW_AGENT_SERVICE=postfix \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh /usr/bin/supervisord -c /etc/supervisor/supervisord.conf" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/postfix/syslog-ng-redis_slave.conf b/data/Dockerfiles/postfix/syslog-ng-redis_slave.conf index 8e15932a2..9901d5c88 100644 --- a/data/Dockerfiles/postfix/syslog-ng-redis_slave.conf +++ b/data/Dockerfiles/postfix/syslog-ng-redis_slave.conf @@ -21,7 +21,7 @@ destination d_redis_ui_log { persist-name("redis1") port(`REDIS_SLAVEOF_PORT`) auth("`REDISPASS`") - command("LPUSH" "POSTFIX_MAILLOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "POSTFIX_MAILLOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; destination d_redis_f2b_channel { diff --git a/data/Dockerfiles/postfix/syslog-ng.conf b/data/Dockerfiles/postfix/syslog-ng.conf index fc7d1aa0f..ec2eaa1b4 100644 --- a/data/Dockerfiles/postfix/syslog-ng.conf +++ b/data/Dockerfiles/postfix/syslog-ng.conf @@ -21,7 +21,7 @@ destination d_redis_ui_log { persist-name("redis1") port(6379) auth("`REDISPASS`") - command("LPUSH" "POSTFIX_MAILLOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "POSTFIX_MAILLOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; destination d_redis_f2b_channel { diff --git a/data/Dockerfiles/rspamd/Dockerfile b/data/Dockerfiles/rspamd/Dockerfile index d0c710428..39f0ad0b0 100644 --- a/data/Dockerfiles/rspamd/Dockerfile +++ b/data/Dockerfiles/rspamd/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM debian:trixie-slim LABEL maintainer="The Infrastructure Company GmbH " @@ -33,8 +37,16 @@ COPY settings.conf /etc/rspamd/settings.conf COPY set_worker_password.sh /set_worker_password.sh COPY docker-entrypoint.sh /docker-entrypoint.sh -ENTRYPOINT ["/docker-entrypoint.sh"] - STOPSIGNAL SIGTERM -CMD ["/usr/bin/rspamd", "-f", "-u", "_rspamd", "-g", "_rspamd"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=rspamd \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh /usr/bin/rspamd -f -u _rspamd -g _rspamd" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/sogo/Dockerfile b/data/Dockerfiles/sogo/Dockerfile index 1c7287200..71aa9ffbc 100644 --- a/data/Dockerfiles/sogo/Dockerfile +++ b/data/Dockerfiles/sogo/Dockerfile @@ -7,6 +7,10 @@ # # To add new patches, modify SOGO_SECURITY_PATCHES ARG below with space-separated commit hashes +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM debian:bookworm LABEL maintainer="The Infrastructure Company GmbH " @@ -174,6 +178,14 @@ COPY docker-entrypoint.sh / RUN chmod +x /bootstrap-sogo.sh \ /usr/local/sbin/stop-supervisor.sh -ENTRYPOINT ["/docker-entrypoint.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli -CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] +ENV MAILCOW_AGENT_SERVICE=sogo \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh /usr/bin/supervisord -c /etc/supervisor/supervisord.conf" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/sogo/syslog-ng-redis_slave.conf b/data/Dockerfiles/sogo/syslog-ng-redis_slave.conf index 675e4c67a..b4cad12e6 100644 --- a/data/Dockerfiles/sogo/syslog-ng-redis_slave.conf +++ b/data/Dockerfiles/sogo/syslog-ng-redis_slave.conf @@ -23,7 +23,7 @@ destination d_redis_ui_log { persist-name("redis1") port(`REDIS_SLAVEOF_PORT`) auth("`REDISPASS`") - command("LPUSH" "SOGO_LOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "SOGO_LOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; destination d_redis_f2b_channel { diff --git a/data/Dockerfiles/sogo/syslog-ng.conf b/data/Dockerfiles/sogo/syslog-ng.conf index 8460f2f95..bdb90d9ee 100644 --- a/data/Dockerfiles/sogo/syslog-ng.conf +++ b/data/Dockerfiles/sogo/syslog-ng.conf @@ -23,7 +23,7 @@ destination d_redis_ui_log { persist-name("redis1") port(6379) auth("`REDISPASS`") - command("LPUSH" "SOGO_LOG" "$(format-json time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") + command("LPUSH" "SOGO_LOG" "$(format-json node=\"$HOST\" time=\"$S_UNIXTIME\" priority=\"$PRIORITY\" program=\"$PROGRAM\" message=\"$MESSAGE\")\n") ); }; destination d_redis_f2b_channel { diff --git a/data/Dockerfiles/unbound/Dockerfile b/data/Dockerfiles/unbound/Dockerfile index 7c4921384..cf1d18b72 100644 --- a/data/Dockerfiles/unbound/Dockerfile +++ b/data/Dockerfiles/unbound/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.23 LABEL maintainer = "The Infrastructure Company GmbH " @@ -29,8 +33,15 @@ COPY supervisord.conf /etc/supervisor/supervisord.conf COPY stop-supervisor.sh /usr/local/sbin/stop-supervisor.sh RUN chmod +x /healthcheck.sh -HEALTHCHECK --interval=30s --timeout=10s \ - CMD sh -c '[ -f /tmp/healthcheck_status ] && [ "$(cat /tmp/healthcheck_status)" -eq 0 ] || exit 1' -ENTRYPOINT ["/docker-entrypoint.sh"] -CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=unbound \ + MAILCOW_AGENT_MAIN_CMD="/docker-entrypoint.sh /usr/bin/supervisord -c /etc/supervisor/supervisord.conf" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/watchdog/Dockerfile b/data/Dockerfiles/watchdog/Dockerfile index 54854ea1a..9595dadba 100644 --- a/data/Dockerfiles/watchdog/Dockerfile +++ b/data/Dockerfiles/watchdog/Dockerfile @@ -1,3 +1,7 @@ +ARG AGENT_IMAGE=ghcr.io/mailcow/agent:1.00 + +FROM ${AGENT_IMAGE} AS mailcow-agent-src + FROM alpine:3.23 LABEL maintainer = "The Infrastructure Company GmbH " @@ -39,4 +43,14 @@ COPY check_mysql_slavestatus.sh /usr/lib/nagios/plugins/check_mysql_slavestatus. COPY check_dns.sh /usr/lib/mailcow/check_dns.sh COPY client.cnf /etc/my.cnf.d/client.cnf -CMD ["/watchdog.sh"] +COPY --from=mailcow-agent-src /out/mailcow-agent /usr/local/bin/mailcow-agent +COPY --from=mailcow-agent-src /out/mailcow-agent-cli /usr/local/bin/mailcow-agent-cli + +ENV MAILCOW_AGENT_SERVICE=watchdog \ + MAILCOW_AGENT_MAIN_CMD="/watchdog.sh" + +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD ["/usr/local/bin/mailcow-agent", "healthcheck"] + +ENTRYPOINT ["/usr/local/bin/mailcow-agent"] +CMD [] diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index 020f3f838..a25c173d0 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -188,44 +188,6 @@ function notify_error() { fi } -get_container_ip() { - # ${1} is container - CONTAINER_ID=() - CONTAINER_IPS=() - CONTAINER_IP= - LOOP_C=1 - until [[ ${CONTAINER_IP} =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]] || [[ ${LOOP_C} -gt 5 ]]; do - if [ ${IP_BY_DOCKER_API} -eq 0 ]; then - CONTAINER_IP=$(dig a "${1}" +short) - else - sleep 0.5 - # get long container id for exact match - CONTAINER_ID=($(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" | jq -rc "select( .name | tostring == \"${1}\") | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id")) - # returned id can have multiple elements (if scaled), shuffle for random test - CONTAINER_ID=($(printf "%s\n" "${CONTAINER_ID[@]}" | shuf)) - if [[ ! -z ${CONTAINER_ID} ]]; then - for matched_container in "${CONTAINER_ID[@]}"; do - CONTAINER_IPS=($(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${matched_container}/json | jq -r '.NetworkSettings.Networks[].IPAddress')) - for ip_match in "${CONTAINER_IPS[@]}"; do - # grep will do nothing if one of these vars is empty - [[ -z ${ip_match} ]] && continue - [[ -z ${IPV4_NETWORK} ]] && continue - # only return ips that are part of our network - if ! grep -q ${IPV4_NETWORK} <(echo ${ip_match}); then - continue - else - CONTAINER_IP=${ip_match} - break - fi - done - [[ ! -z ${CONTAINER_IP} ]] && break - done - fi - fi - LOOP_C=$((LOOP_C + 1)) - done - [[ ${LOOP_C} -gt 5 ]] && echo 240.0.0.0 || echo ${CONTAINER_IP} -} # One-time check if grep -qi "$(echo ${IPV6_NETWORK} | cut -d: -f1-3)" <<< "$(ip a s)"; then @@ -267,295 +229,6 @@ external_checks() { return 1 } -nginx_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${NGINX_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/nginx-mailcow; echo "$(tail -50 /tmp/nginx-mailcow)" > /tmp/nginx-mailcow - host_ip=$(get_container_ip nginx-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_http -4 -H ${host_ip} -u / -p 8081 2>> /tmp/nginx-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Nginx" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -unbound_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${UNBOUND_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/unbound-mailcow; echo "$(tail -50 /tmp/unbound-mailcow)" > /tmp/unbound-mailcow - host_ip=$(get_container_ip unbound-mailcow) - err_c_cur=${err_count} - /usr/lib/mailcow/check_dns.sh -s ${host_ip} -H stackoverflow.com 2>> /tmp/unbound-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - DNSSEC=$(dig com +dnssec | egrep 'flags:.+ad') - if [[ -z ${DNSSEC} ]]; then - echo "DNSSEC failure" 2>> /tmp/unbound-mailcow 1>&2 - err_count=$(( ${err_count} + 1)) - else - echo "DNSSEC check succeeded" 2>> /tmp/unbound-mailcow 1>&2 - fi - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Unbound" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -redis_checks() { - # A check for the local redis container - err_count=0 - diff_c=0 - THRESHOLD=${REDIS_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/redis-mailcow; echo "$(tail -50 /tmp/redis-mailcow)" > /tmp/redis-mailcow - host_ip=$(get_container_ip redis-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_tcp -4 -H redis-mailcow -p 6379 -E -s "AUTH ${REDISPASS}\nPING\n" -q "QUIT" -e "PONG" 2>> /tmp/redis-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Redis" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -mysql_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${MYSQL_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/mysql-mailcow; echo "$(tail -50 /tmp/mysql-mailcow)" > /tmp/mysql-mailcow - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_mysql -f /etc/my.cnf.d/client.cnf -s /var/run/mysqld/mysqld.sock -u ${DBUSER} -p ${DBPASS} -d ${DBNAME} 2>> /tmp/mysql-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_mysql_query -f /etc/my.cnf.d/client.cnf -s /var/run/mysqld/mysqld.sock -u ${DBUSER} -p ${DBPASS} -d ${DBNAME} -q "SELECT COUNT(*) FROM information_schema.tables" 2>> /tmp/mysql-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "MySQL/MariaDB" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -mysql_repl_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${MYSQL_REPLICATION_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/mysql_repl_checks; echo "$(tail -50 /tmp/mysql_repl_checks)" > /tmp/mysql_repl_checks - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_mysql_slavestatus.sh -o /etc/my.cnf.d/client.cnf -S /var/run/mysqld/mysqld.sock -u root -p ${DBROOT} 2>> /tmp/mysql_repl_checks 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "MySQL/MariaDB replication" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 60 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -sogo_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${SOGO_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/sogo-mailcow; echo "$(tail -50 /tmp/sogo-mailcow)" > /tmp/sogo-mailcow - host_ip=$(get_container_ip sogo-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_http -4 -H ${host_ip} -u /SOGo.index/ -p 20000 2>> /tmp/sogo-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "SOGo" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -postfix_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${POSTFIX_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/postfix-mailcow; echo "$(tail -50 /tmp/postfix-mailcow)" > /tmp/postfix-mailcow - host_ip=$(get_container_ip postfix-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 589 -f "watchdog@invalid" -C "RCPT TO:watchdog@localhost" -C DATA -C . -R 250 2>> /tmp/postfix-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 589 -S 2>> /tmp/postfix-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Postfix" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -postfix-tlspol_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${POSTFIX_TLSPOL_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/postfix-tlspol-mailcow; echo "$(tail -50 /tmp/postfix-tlspol-mailcow)" > /tmp/postfix-tlspol-mailcow - host_ip=$(get_container_ip postfix-tlspol-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 8642 2>> /tmp/postfix-tlspol-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Postfix TLS Policy companion" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -clamd_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${CLAMD_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/clamd-mailcow; echo "$(tail -50 /tmp/clamd-mailcow)" > /tmp/clamd-mailcow - host_ip=$(get_container_ip clamd-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_clamd -4 -H ${host_ip} 2>> /tmp/clamd-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Clamd" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 120 ) + 20 )) - fi - done - return 1 -} - -dovecot_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${DOVECOT_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/dovecot-mailcow; echo "$(tail -50 /tmp/dovecot-mailcow)" > /tmp/dovecot-mailcow - host_ip=$(get_container_ip dovecot-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 24 -f "watchdog@invalid" -C "RCPT TO:" -L -R "User doesn't exist" 2>> /tmp/dovecot-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_imap -4 -H ${host_ip} -p 993 -S -e "OK " 2>> /tmp/dovecot-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_imap -4 -H ${host_ip} -p 143 -e "OK " 2>> /tmp/dovecot-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 10001 -e "VERSION" 2>> /tmp/dovecot-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 4190 -e "Dovecot ready" 2>> /tmp/dovecot-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Dovecot" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -dovecot_repl_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${DOVECOT_REPL_THRESHOLD} - D_REPL_STATUS=$(redis-cli -h redis -a ${REDISPASS} --no-auth-warning -r GET DOVECOT_REPL_HEALTH) - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - err_c_cur=${err_count} - D_REPL_STATUS=$(redis-cli --raw -h redis -a ${REDISPASS} --no-auth-warning GET DOVECOT_REPL_HEALTH) - if [[ "${D_REPL_STATUS}" != "1" ]]; then - err_count=$(( ${err_count} + 1 )) - fi - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Dovecot replication" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 60 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - cert_checks() { err_count=0 diff_c=0 @@ -564,11 +237,9 @@ cert_checks() { trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do touch /tmp/certcheck; echo "$(tail -50 /tmp/certcheck)" > /tmp/certcheck - host_ip_postfix=$(get_container_ip postfix) - host_ip_dovecot=$(get_container_ip dovecot) err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_smtp -H ${host_ip_postfix} -p 589 -4 -S -D 7 2>> /tmp/certcheck 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_imap -H ${host_ip_dovecot} -p 993 -4 -S -D 7 2>> /tmp/certcheck 1>&2; err_count=$(( ${err_count} + $? )) + /usr/lib/nagios/plugins/check_smtp -H postfix -p 589 -4 -S -D 7 2>> /tmp/certcheck 1>&2; err_count=$(( ${err_count} + $? )) + /usr/lib/nagios/plugins/check_imap -H dovecot -p 993 -4 -S -D 7 2>> /tmp/certcheck 1>&2; err_count=$(( ${err_count} + $? )) [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) progress "Primary certificate expiry check" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} @@ -578,31 +249,6 @@ cert_checks() { return 1 } -phpfpm_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${PHPFPM_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/php-fpm-mailcow; echo "$(tail -50 /tmp/php-fpm-mailcow)" > /tmp/php-fpm-mailcow - host_ip=$(get_container_ip php-fpm-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_tcp -H ${host_ip} -p 9001 2>> /tmp/php-fpm-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_tcp -H ${host_ip} -p 9002 2>> /tmp/php-fpm-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "PHP-FPM" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} ratelimit_checks() { err_count=0 @@ -736,90 +382,63 @@ acme_checks() { return 1 } -rspamd_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${RSPAMD_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/rspamd-mailcow; echo "$(tail -50 /tmp/rspamd-mailcow)" > /tmp/rspamd-mailcow - host_ip=$(get_container_ip rspamd-mailcow) - err_c_cur=${err_count} - SCORE=$(echo 'To: null@localhost -From: watchdog@localhost -Empty -' | usr/bin/curl --max-time 10 -s --data-binary @- --unix-socket /var/lib/rspamd/rspamd.sock http://rspamd.${COMPOSE_PROJECT_NAME}_mailcow-network/scan | jq -rc .default.required_score | sed 's/\..*//' ) - if [[ ${SCORE} -ne 9999 ]]; then - echo "Rspamd settings check failed, score returned: ${SCORE}" 2>> /tmp/rspamd-mailcow 1>&2 - err_count=$(( ${err_count} + 1)) - else - echo "Rspamd settings check succeeded, score returned: ${SCORE}" 2>> /tmp/rspamd-mailcow 1>&2 - fi - # A dirty hack until a PING PONG event is implemented to worker proxy - # We expect an empty response, not a timeout - if [ "$(curl -s --max-time 10 ${host_ip}:9900 2> /dev/null ; echo $?)" == "28" ]; then - echo "Milter check failed" 2>> /tmp/rspamd-mailcow 1>&2; err_count=$(( ${err_count} + 1 )); - else - echo "Milter check succeeded" 2>> /tmp/rspamd-mailcow 1>&2 - fi - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Rspamd" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} - -olefy_checks() { - err_count=0 - diff_c=0 - THRESHOLD=${OLEFY_THRESHOLD} - # Reduce error count by 2 after restarting an unhealthy container - trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 - while [ ${err_count} -lt ${THRESHOLD} ]; do - touch /tmp/olefy-mailcow; echo "$(tail -50 /tmp/olefy-mailcow)" > /tmp/olefy-mailcow - host_ip=$(get_container_ip olefy-mailcow) - err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 10055 -s "PING\n" 2>> /tmp/olefy-mailcow 1>&2; err_count=$(( ${err_count} + $? )) - [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 - [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) - progress "Olefy" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} - if [[ $? == 10 ]]; then - diff_c=0 - sleep 1 - else - diff_c=0 - sleep $(( ( RANDOM % 60 ) + 20 )) - fi - done - return 1 -} # Notify about start if [[ ${WATCHDOG_NOTIFY_START} =~ ^([yY][eE][sS]|[yY])+$ ]]; then notify_error "watchdog-mailcow" "Watchdog started monitoring mailcow." fi -# Create watchdog agents +# Health checks run inside each container (mailcow-agent healthcheck + heartbeat). +# We just read the per-node health field from Redis and restart on N consecutive fails. +REDIS_HOST="${REDIS_SLAVEOF_IP:-redis-mailcow}" +REDIS_PORT="${REDIS_SLAVEOF_PORT:-6379}" +REDIS_CMDLINE_FULL="redis-cli -h ${REDIS_HOST} -p ${REDIS_PORT} -a ${REDISPASS} --no-auth-warning" + +HEALTH_WATCHED_SERVICES=( + postfix dovecot sogo rspamd nginx + clamd unbound olefy phpfpm postfix-tlspol +) + +declare -A HEALTH_FAIL_COUNT +HEALTH_FAIL_THRESHOLD=3 + +[[ "${SKIP_SOGO}" =~ ^([yY][eE][sS]|[yY])+$ ]] && HEALTH_WATCHED_SERVICES=("${HEALTH_WATCHED_SERVICES[@]/sogo}") +[[ "${SKIP_CLAMD}" =~ ^([yY][eE][sS]|[yY])+$ ]] && HEALTH_WATCHED_SERVICES=("${HEALTH_WATCHED_SERVICES[@]/clamd}") +[[ "${SKIP_OLEFY}" =~ ^([yY][eE][sS]|[yY])+$ ]] && HEALTH_WATCHED_SERVICES=("${HEALTH_WATCHED_SERVICES[@]/olefy}") ( +# Counters are per-node in an associative array reset on restart, so absorb USR1 +# instead of dying (other tasks trap it to decrement their own err_count). +trap '' USR1 +declare -A HEALTH_FAIL_COUNT while true; do - if ! nginx_checks; then - log_msg "Nginx hit error limit" - echo nginx-mailcow > /tmp/com_pipe - fi + for svc in "${HEALTH_WATCHED_SERVICES[@]}"; do + [[ -z "$svc" ]] && continue + nodes=$(${REDIS_CMDLINE_FULL} ZRANGEBYSCORE "mailcow.nodes.${svc}" "$(( $(date +%s) - 30 ))" "+inf" 2>/dev/null) + [[ -z "${nodes}" ]] && continue + while IFS= read -r node; do + [[ -z "${node}" ]] && continue + health=$(${REDIS_CMDLINE_FULL} HGET "mailcow.node.${svc}.${node}" health 2>/dev/null) + key="${svc}|${node}" + if [[ "${health}" == "fail" ]]; then + HEALTH_FAIL_COUNT[$key]=$(( ${HEALTH_FAIL_COUNT[$key]:-0} + 1 )) + if [[ ${HEALTH_FAIL_COUNT[$key]} -ge ${HEALTH_FAIL_THRESHOLD} ]]; then + detail=$(${REDIS_CMDLINE_FULL} HGET "mailcow.node.${svc}.${node}" health_detail 2>/dev/null) + log_msg "Service ${svc} node ${node} unhealthy (${detail:-no detail}) — sending restart" + echo "${svc}-mailcow|${node}" > /tmp/com_pipe + HEALTH_FAIL_COUNT[$key]=0 + fi + else + HEALTH_FAIL_COUNT[$key]=0 + fi + done <<< "${nodes}" + done + sleep 15 done ) & PID=$! -echo "Spawned nginx_checks with PID ${PID}" +echo "Spawned registry-based health monitor with PID ${PID}" BACKGROUND_TASKS+=(${PID}) if [[ ${WATCHDOG_EXTERNAL_CHECKS} =~ ^([yY][eE][sS]|[yY])+$ ]]; then @@ -836,110 +455,6 @@ echo "Spawned external_checks with PID ${PID}" BACKGROUND_TASKS+=(${PID}) fi -if [[ ${WATCHDOG_MYSQL_REPLICATION_CHECKS} =~ ^([yY][eE][sS]|[yY])+$ ]]; then -( -while true; do - if ! mysql_repl_checks; then - log_msg "MySQL replication check hit error limit" - echo mysql_repl_checks > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned mysql_repl_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) -fi - -( -while true; do - if ! mysql_checks; then - log_msg "MySQL hit error limit" - echo mysql-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned mysql_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - -( -while true; do - if ! redis_checks; then - log_msg "Local Redis hit error limit" - echo redis-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned redis_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - -( -while true; do - if ! phpfpm_checks; then - log_msg "PHP-FPM hit error limit" - echo php-fpm-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned phpfpm_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - -if [[ "${SKIP_SOGO}" =~ ^([nN][oO]|[nN])+$ ]]; then -( -while true; do - if ! sogo_checks; then - log_msg "SOGo hit error limit" - echo sogo-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned sogo_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) -fi - -if [ ${CHECK_UNBOUND} -eq 1 ]; then -( -while true; do - if ! unbound_checks; then - log_msg "Unbound hit error limit" - echo unbound-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned unbound_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) -fi - -if [[ "${SKIP_CLAMD}" =~ ^([nN][oO]|[nN])+$ ]]; then -( -while true; do - if ! clamd_checks; then - log_msg "Clamd hit error limit" - echo clamd-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned clamd_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) -fi - -( -while true; do - if ! postfix_checks; then - log_msg "Postfix hit error limit" - echo postfix-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned postfix_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - ( while true; do if ! mailq_checks; then @@ -952,54 +467,6 @@ PID=$! echo "Spawned mailq_checks with PID ${PID}" BACKGROUND_TASKS+=(${PID}) -( -while true; do - if ! postfix-tlspol_checks; then - log_msg "Postfix TLS Policy hit error limit" - echo postfix-tlspol-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned postfix-tlspol_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - -( -while true; do - if ! dovecot_checks; then - log_msg "Dovecot hit error limit" - echo dovecot-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned dovecot_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - -( -while true; do - if ! dovecot_repl_checks; then - log_msg "Dovecot hit error limit" - echo dovecot_repl_checks > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned dovecot_repl_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - -( -while true; do - if ! rspamd_checks; then - log_msg "Rspamd hit error limit" - echo rspamd-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned rspamd_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) - ( while true; do if ! ratelimit_checks; then @@ -1036,20 +503,6 @@ PID=$! echo "Spawned cert_checks with PID ${PID}" BACKGROUND_TASKS+=(${PID}) -if [[ "${SKIP_OLEFY}" =~ ^([nN][oO]|[nN])+$ ]]; then -( -while true; do - if ! olefy_checks; then - log_msg "Olefy hit error limit" - echo olefy-mailcow > /tmp/com_pipe - fi -done -) & -PID=$! -echo "Spawned olefy_checks with PID ${PID}" -BACKGROUND_TASKS+=(${PID}) -fi - ( while true; do if ! acme_checks; then @@ -1075,15 +528,19 @@ while true; do done ) & -# Monitor dockerapi +# Pause background checks while Redis (the control bus) is unreachable, otherwise +# we'd flag every service as unhealthy at once. ( +REDIS_HOST="${REDIS_SLAVEOF_IP:-redis-mailcow}" +REDIS_PORT="${REDIS_SLAVEOF_PORT:-6379}" +ping_bus() { redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" -a "${REDISPASS}" --no-auth-warning ping > /dev/null 2>&1; } while true; do - while nc -z dockerapi 443; do + while ping_bus; do sleep 3 done - log_msg "Cannot find dockerapi-mailcow, waiting to recover..." + log_msg "Cannot reach redis-mailcow (control bus), waiting to recover..." kill -STOP ${BACKGROUND_TASKS[*]} - until nc -z dockerapi 443; do + until ping_bus; do sleep 3 done kill -CONT ${BACKGROUND_TASKS[*]} @@ -1143,24 +600,33 @@ while true; do elif [[ ${com_pipe_answer} =~ .+-mailcow ]]; then kill -STOP ${BACKGROUND_TASKS[*]} sleep 10 - CONTAINER_ID=$(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], project: .Config.Labels[\"com.docker.compose.project\"], id: .Id}" | jq -rc "select( .name | tostring | contains(\"${com_pipe_answer}\")) | select( .project | tostring | contains(\"${COMPOSE_PROJECT_NAME,,}\")) | .id") - if [[ ! -z ${CONTAINER_ID} ]]; then - if [[ "${com_pipe_answer}" == "php-fpm-mailcow" ]]; then - HAS_INITDB=$(curl --silent --insecure -XPOST https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${CONTAINER_ID}/top | jq '.msg.Processes[] | contains(["php -c /usr/local/etc/php -f /web/inc/init_db.inc.php"])' | grep true) - fi - S_RUNNING=$(($(date +%s) - $(curl --silent --insecure https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${CONTAINER_ID}/json | jq .State.StartedAt | xargs -n1 date +%s -d))) - if [ ${S_RUNNING} -lt 360 ]; then - log_msg "Container is running for less than 360 seconds, skipping action..." - elif [[ ! -z ${HAS_INITDB} ]]; then - log_msg "Database is being initialized by php-fpm-mailcow, not restarting but delaying checks for a minute..." - sleep 60 + # "-mailcow|" restarts a single replica; bare "-mailcow" + # broadcasts the restart to every replica of the service. + AGENT_NODE="" + AGENT_SVC="${com_pipe_answer%-mailcow}" + if [[ "${com_pipe_answer}" == *"|"* ]]; then + AGENT_NODE="${com_pipe_answer#*|}" + AGENT_SVC="${com_pipe_answer%|*}" + AGENT_SVC="${AGENT_SVC%-mailcow}" + fi + STARTED_AT_RAW=$(redis-cli -h "${REDIS_SLAVEOF_IP:-redis-mailcow}" -p "${REDIS_SLAVEOF_PORT:-6379}" -a "${REDISPASS}" --no-auth-warning HGET "mailcow.node.${AGENT_SVC}.${AGENT_NODE:-$(hostname)}" started_at 2>/dev/null) + S_RUNNING=999 + if [[ -n "${STARTED_AT_RAW}" ]]; then + S_RUNNING=$(( $(date +%s) - $(date -d "${STARTED_AT_RAW}" +%s 2>/dev/null || echo 0) )) + fi + if [ ${S_RUNNING} -lt 360 ]; then + log_msg "Container is running for less than 360 seconds, skipping action..." + else + if [[ -n "${AGENT_NODE}" ]]; then + log_msg "Sending restart to ${AGENT_SVC} node ${AGENT_NODE} via control bus..." + mailcow-agent-cli send "${AGENT_SVC}" restart "{\"target_node\":\"${AGENT_NODE}\"}" >/dev/null || true else - log_msg "Sending restart command to ${CONTAINER_ID}..." - curl --silent --insecure -XPOST https://dockerapi.${COMPOSE_PROJECT_NAME}_mailcow-network/containers/${CONTAINER_ID}/restart - notify_error "${com_pipe_answer}" - log_msg "Wait for restarted container to settle and continue watching..." - sleep 35 + log_msg "Sending restart broadcast to ${AGENT_SVC} via control bus..." + mailcow-agent-cli send "${AGENT_SVC}" restart >/dev/null || true fi + notify_error "${com_pipe_answer}" + log_msg "Wait for restarted container to settle and continue watching..." + sleep 35 fi kill -CONT ${BACKGROUND_TASKS[*]} sleep 1 diff --git a/data/web/admin/dashboard.php b/data/web/admin/dashboard.php index 2867770bc..b0eebb414 100644 --- a/data/web/admin/dashboard.php +++ b/data/web/admin/dashboard.php @@ -16,46 +16,92 @@ if (!isset($_SESSION['gal']) && $license_cache = $redis->Get('LICENSE_STATUS_CAC $js_minifier->add('/web/js/site/dashboard.js'); -// vmail df -$exec_fields = array('cmd' => 'system', 'task' => 'df', 'dir' => '/var/vmail'); -$vmail_df = explode(',', (string)json_decode(docker('post', 'dovecot-mailcow', 'exec', $exec_fields), true)); +$vmail_df_resp = agent('request', 'dovecot', 'exec.df', array('dir' => '/var/vmail'), 5); +$vmail_df = (!empty($vmail_df_resp['ok']) && is_string($vmail_df_resp['result'])) + ? explode(',', $vmail_df_resp['result']) + : array('', '', '', '', '', '/var/vmail'); -// containers -$containers_info = (array) docker('info'); -if ($clamd_status === false) unset($containers_info['clamd-mailcow']); -if ($olefy_status === false) unset($containers_info['olefy-mailcow']); -ksort($containers_info); -$containers = array(); -foreach ($containers_info as $container => $container_info) { - if (!isset($container_info['State']) || !is_array($container_info['State']) || !isset($container_info['State']['StartedAt'])){ - continue; - } - date_default_timezone_set('UTC'); - $StartedAt = date_parse($container_info['State']['StartedAt']); - if ($StartedAt['hour'] !== false) { - $date = new \DateTime(); - $date->setTimestamp(mktime( - $StartedAt['hour'], - $StartedAt['minute'], - $StartedAt['second'], - $StartedAt['month'], - $StartedAt['day'], - $StartedAt['year'])); - try { - $user_tz = new DateTimeZone(getenv('TZ')); - $date->setTimezone($user_tz); - $container_info['State']['StartedAtHR'] = $date->format('r'); - } catch(Exception $e) { - $container_info['State']['StartedAtHR'] = '?'; - } - } - else { - $container_info['State']['StartedAtHR'] = '?'; - } - $containers[$container] = $container_info; +$known_services = agent('services'); + +try { + $tz_obj = new DateTimeZone(getenv('TZ') ?: 'UTC'); +} +catch (Exception $e) { + $tz_obj = new DateTimeZone('UTC'); } -// get mailcow data +$containers = array(); +foreach ($known_services as $svc) { + $live_nodes = agent('live_nodes', $svc); + $running = !empty($live_nodes); + $first_node = $running ? $live_nodes[0] : ''; + $first_meta = $running ? (agent('node_meta', $svc, $first_node) ?: array()) : array(); + + $started_at_hr = '—'; + $started_at_iso = isset($first_meta['started_at']) ? $first_meta['started_at'] : ''; + if ($started_at_iso !== '') { + try { + $d = new DateTime($started_at_iso); + $d->setTimezone($tz_obj); + $started_at_hr = $d->format('r'); + } + catch (Exception $e) {} + } + + $nodes = array(); + $unhealthy_nodes = 0; + $first_unhealthy_detail = ''; + foreach ($live_nodes as $n) { + $m = agent('node_meta', $svc, $n) ?: array(); + $s = agent('node_stats', $svc, $n) ?: array(); + $node_health = isset($m['health']) ? $m['health'] : ''; + $node_health_detail = isset($m['health_detail']) ? $m['health_detail'] : ''; + if ($node_health === 'fail') { + $unhealthy_nodes++; + if ($first_unhealthy_detail === '') { + $first_unhealthy_detail = $node_health_detail; + } + } + $nodes[] = array( + 'NodeId' => $n, + 'Image' => isset($m['image']) ? $m['image'] : '', + 'StartedAt' => isset($m['started_at']) ? $m['started_at'] : '', + 'Version' => isset($m['version']) ? $m['version'] : '', + 'CPUPercent' => isset($s['cpu_percent']) ? $s['cpu_percent'] : '', + 'MemoryBytes' => isset($s['memory_bytes']) ? $s['memory_bytes'] : '', + 'Health' => $node_health, + 'HealthDetail' => $node_health_detail + ); + } + + $service_health = 'unknown'; + if ($running) { + $service_health = ($unhealthy_nodes === 0) ? 'ok' : (($unhealthy_nodes === count($live_nodes)) ? 'fail' : 'degraded'); + } + + $containers[$svc . '-mailcow'] = array( + 'Service' => $svc, + 'State' => array( + 'Running' => $running ? 1 : 0, + 'NodeCount' => count($live_nodes), + 'UnhealthyCount' => $unhealthy_nodes, + 'Health' => $service_health, + 'HealthDetail' => $first_unhealthy_detail, + 'StartedAt' => $started_at_iso, + 'StartedAtHR' => $started_at_hr + ), + 'Config' => array( + 'Image' => isset($first_meta['image']) ? $first_meta['image'] : '' + ), + 'Id' => $first_node, + 'Nodes' => $nodes, + 'External' => false + ); +} + +$infra_containers = infra('status'); +ksort($containers); + $hostname = getenv('MAILCOW_HOSTNAME'); $timezone = getenv('TZ'); @@ -70,6 +116,7 @@ $template_data = [ 'clamd_status' => $clamd_status, 'olefy_status' => $olefy_status, 'containers' => $containers, + 'infra_containers' => $infra_containers, 'ip_check' => customize('get', 'ip_check'), 'lang_admin' => json_encode($lang['admin']), 'lang_debug' => json_encode($lang['debug']), @@ -77,5 +124,3 @@ $template_data = [ ]; require_once $_SERVER['DOCUMENT_ROOT'] . '/inc/footer.inc.php'; - - diff --git a/data/web/inc/ajax/container_ctrl.php b/data/web/inc/ajax/container_ctrl.php index 48c21cb13..6bfa41e30 100644 --- a/data/web/inc/ajax/container_ctrl.php +++ b/data/web/inc/ajax/container_ctrl.php @@ -1,59 +1,35 @@ -OK' : 'Error: ' . $response['msg'] . ''; - if ($response['type'] == "success") { - break; - } - usleep(1500000); - $retry++; - } - echo (!isset($last_response)) ? 'Already running' : $last_response; - } - if ($_GET['action'] == "stop") { - header('Content-Type: text/html; charset=utf-8'); - $retry = 0; - while (docker('info', $_GET['service'])['State']['Running'] == 1 && $retry <= 3) { - $response = docker('post', $_GET['service'], 'stop'); - $response = json_decode($response, true); - $last_response = ($response['type'] == "success") ? 'OK' : 'Error: ' . $response['msg'] . ''; - if ($response['type'] == "success") { - break; - } - usleep(1500000); - $retry++; - } - echo (!isset($last_response)) ? 'Not running' : $last_response; - } - if ($_GET['action'] == "restart") { - header('Content-Type: text/html; charset=utf-8'); - $response = docker('post', $_GET['service'], 'restart'); - $response = json_decode($response, true); - $last_response = ($response['type'] == "success") ? 'OK' : 'Error: ' . $response['msg'] . ''; - echo (!isset($last_response)) ? 'Cannot restart container' : $last_response; - } - if ($_GET['action'] == "logs") { - $lines = (empty($_GET['lines']) || !is_numeric($_GET['lines'])) ? 1000 : $_GET['lines']; - header('Content-Type: text/plain; charset=utf-8'); - print_r(preg_split('/\n/', docker('logs', $_GET['service'], $lines))); - } -} - -?> + $node) : array(); +$resp = agent('request', $service, 'restart', $args, 60); +header('Content-Type: text/html; charset=utf-8'); +if (agent('ok', $resp)) { + echo '' . htmlspecialchars($lang['success']['service_restart_ok']) . ''; +} +else { + $err_key = agent('error_lang', $resp); + $err_msg = isset($lang['danger'][$err_key]) + ? sprintf($lang['danger'][$err_key], $service) + : $lang['danger']['agent_unknown_error']; + echo '' . htmlspecialchars($err_msg) . ''; +} diff --git a/data/web/inc/functions.agent.inc.php b/data/web/inc/functions.agent.inc.php new file mode 100644 index 000000000..2bf460106 --- /dev/null +++ b/data/web/inc/functions.agent.inc.php @@ -0,0 +1,281 @@ +zRangeByScore('mailcow.nodes.' . $_service, (string)(time() - 30), '+inf'); + } + catch (RedisException $e) { + return array(); + } + return is_array($members) ? $members : array(); + break; + case 'node_meta': + try { + $h = $redis->hGetAll('mailcow.node.' . $_service . '.' . $_data); + } + catch (RedisException $e) { + return null; + } + return $h ?: null; + break; + case 'node_stats': + try { + $h = $redis->hGetAll('mailcow.stats.' . $_service . '.' . $_data); + } + catch (RedisException $e) { + return null; + } + return $h ?: null; + break; + case 'stats': + $out = array(); + foreach (agent('live_nodes', $_service) as $node_id) { + $stats = agent('node_stats', $_service, $node_id); + if ($stats) { + $out[$node_id] = $stats; + } + } + return $out; + break; + case 'publish': + $env = array( + 'cmd' => $_data, + 'request_id' => agent('request_id'), + 'args' => (object)(is_array($_args) ? $_args : array()), + 'issued_by' => 'mailcow-php' + ); + try { + $redis->publish('mailcow.control.' . $_service, json_encode($env)); + } + catch (RedisException $e) { + return false; + } + return true; + break; + case 'request': + $rid = agent('request_id'); + $reply_to = 'mailcow.reply.' . $rid; + $env = array( + 'cmd' => $_data, + 'request_id' => $rid, + 'args' => (object)(is_array($_args) ? $_args : array()), + 'reply_to' => $reply_to, + 'deadline' => gmdate('Y-m-d\TH:i:s\Z', time() + $_timeout), + 'issued_by' => 'mailcow-php' + ); + try { + $subs = $redis->publish('mailcow.control.' . $_service, json_encode($env)); + if ($subs === 0) { + return array('ok' => false, 'result' => null, 'error' => $_service, 'error_code' => AGENT_ERR_NOT_FOUND, 'node' => '', 'duration_ms' => 0); + } + $popped = $redis->blPop(array($reply_to), $_timeout); + } + catch (RedisException $e) { + return array('ok' => false, 'result' => null, 'error' => $e->getMessage(), 'error_code' => AGENT_ERR_INTERNAL, 'node' => '', 'duration_ms' => 0); + } + if (!$popped || count($popped) < 2) { + return array('ok' => false, 'result' => null, 'error' => '', 'error_code' => AGENT_ERR_TIMEOUT, 'node' => '', 'duration_ms' => 0); + } + $resp = json_decode($popped[1], true); + if (!is_array($resp)) { + return array('ok' => false, 'result' => null, 'error' => 'malformed reply', 'error_code' => AGENT_ERR_INTERNAL, 'node' => '', 'duration_ms' => 0); + } + return array( + 'ok' => !empty($resp['ok']), + 'result' => isset($resp['result']) ? $resp['result'] : null, + 'error' => isset($resp['error']) ? $resp['error'] : '', + 'error_code' => isset($resp['error_code']) ? $resp['error_code'] : '', + 'node' => isset($resp['node']) ? $resp['node'] : '', + 'duration_ms' => isset($resp['duration_ms']) ? $resp['duration_ms'] : 0 + ); + break; + case 'request_all': + $rid = agent('request_id'); + $reply_to = 'mailcow.reply.' . $rid; + $env = array( + 'cmd' => $_data, + 'request_id' => $rid, + 'args' => (object)(is_array($_args) ? $_args : array()), + 'reply_to' => $reply_to, + 'deadline' => gmdate('Y-m-d\TH:i:s\Z', time() + $_timeout), + 'issued_by' => 'mailcow-php' + ); + $expected = max(1, count(agent('live_nodes', $_service))); + try { + $subs = (int)$redis->publish('mailcow.control.' . $_service, json_encode($env)); + } + catch (RedisException $e) { + return array('responses' => array(), 'expected_nodes' => $expected, 'received_nodes' => array(), 'missing_nodes' => array(), 'error' => $e->getMessage()); + } + if ($subs === 0) { + return array('responses' => array(), 'expected_nodes' => 0, 'received_nodes' => array(), 'missing_nodes' => array()); + } + $responses = array(); + $deadline = microtime(true) + $_timeout; + for ($i = 0; $i < $subs; $i++) { + $remaining = (int)ceil($deadline - microtime(true)); + if ($remaining <= 0) break; + try { + $popped = $redis->blPop(array($reply_to), $remaining); + } + catch (RedisException $e) { + break; + } + if (!$popped || count($popped) < 2) break; + $resp = json_decode($popped[1], true); + if (is_array($resp)) { + $responses[] = array( + 'ok' => !empty($resp['ok']), + 'result' => isset($resp['result']) ? $resp['result'] : null, + 'error' => isset($resp['error']) ? $resp['error'] : '', + 'error_code' => isset($resp['error_code']) ? $resp['error_code'] : '', + 'node' => isset($resp['node']) ? $resp['node'] : '', + 'duration_ms' => isset($resp['duration_ms']) ? $resp['duration_ms'] : 0 + ); + } + } + $received_nodes = array(); + foreach ($responses as $r) { + if (!empty($r['node'])) { + $received_nodes[] = $r['node']; + } + } + $live = agent('live_nodes', $_service); + return array( + 'responses' => $responses, + 'expected_nodes' => $expected, + 'received_nodes' => array_values(array_unique($received_nodes)), + 'missing_nodes' => array_values(array_diff($live, $received_nodes)) + ); + break; + case 'ok': + if (isset($_service['responses'])) { + foreach ($_service['responses'] as $r) { + if (!empty($r['ok'])) return true; + } + return false; + } + return !empty($_service['ok']); + break; + case 'first_error': + foreach (isset($_service['responses']) ? $_service['responses'] : array() as $r) { + if (empty($r['ok']) && !empty($r['error'])) return $r['error']; + } + return ''; + break; + case 'error_lang': + $code = is_array($_service) && isset($_service['error_code']) ? $_service['error_code'] : ''; + switch ($code) { + case AGENT_ERR_NOT_FOUND: + return 'no_live_agent'; + case AGENT_ERR_TIMEOUT: + return 'agent_timeout'; + default: + return 'agent_unknown_error'; + } + break; + } +} + +function infra($_action, $_service = null) { + global $redis; + global $pdo; + switch ($_action) { + case 'health': + switch ($_service) { + case 'redis': + try { + if ($redis instanceof Redis && $redis->ping()) { + $info = $redis->info('server'); + $ver = is_array($info) && isset($info['redis_version']) ? $info['redis_version'] : ''; + return array('ok' => true, 'image' => 'redis ' . $ver, 'error' => ''); + } + } + catch (RedisException $e) { + return array('ok' => false, 'image' => 'redis', 'error' => $e->getMessage()); + } + return array('ok' => false, 'image' => 'redis', 'error' => 'PING returned false'); + break; + case 'mysql': + try { + if ($pdo instanceof PDO) { + $row = $pdo->query('SELECT VERSION() AS v')->fetch(PDO::FETCH_ASSOC); + $ver = $row && isset($row['v']) ? $row['v'] : ''; + return array('ok' => true, 'image' => 'mariadb/mysql ' . $ver, 'error' => ''); + } + } + catch (Exception $e) { + return array('ok' => false, 'image' => 'mariadb/mysql', 'error' => $e->getMessage()); + } + return array('ok' => false, 'image' => 'mariadb/mysql', 'error' => 'no PDO handle'); + break; + case 'memcached': + $sock = @fsockopen('memcached', 11211, $errno, $errstr, 2); + if (!$sock) { + return array('ok' => false, 'image' => 'memcached', 'error' => $errstr ?: 'connection refused'); + } + stream_set_timeout($sock, 2); + fwrite($sock, "version\r\n"); + $line = fgets($sock, 64); + fclose($sock); + if (is_string($line) && strpos($line, 'VERSION') === 0) { + return array('ok' => true, 'image' => 'memcached ' . trim(substr($line, strlen('VERSION '))), 'error' => ''); + } + return array('ok' => false, 'image' => 'memcached', 'error' => 'no VERSION reply'); + break; + } + break; + case 'status': + $out = array(); + $defs = array( + 'redis-mailcow' => 'redis', + 'mysql-mailcow' => 'mysql', + 'memcached-mailcow' => 'memcached' + ); + foreach ($defs as $key => $svc) { + $h = infra('health', $svc); + $out[$key] = array( + 'Service' => $svc, + 'State' => array( + 'Running' => $h['ok'] ? 1 : 0, + 'NodeCount' => $h['ok'] ? 1 : 0, + 'StartedAt' => '', + 'StartedAtHR' => '—', + 'Error' => $h['error'] + ), + 'Config' => array('Image' => $h['image']), + 'Id' => $svc, + 'Nodes' => array(), + 'External' => true + ); + } + return $out; + break; + } +} diff --git a/data/web/inc/functions.docker.inc.php b/data/web/inc/functions.docker.inc.php deleted file mode 100644 index daed17c63..000000000 --- a/data/web/inc/functions.docker.inc.php +++ /dev/null @@ -1,207 +0,0 @@ - "container_post", - "container_name" => $service_name, - "post_action" => $attr1, - "request" => $attr2 - ); - - $redis->publish("MC_CHANNEL", json_encode($request)); - return true; - break; - } -} diff --git a/data/web/inc/functions.fail2ban.inc.php b/data/web/inc/functions.fail2ban.inc.php index 5962237fc..9a26f004f 100644 --- a/data/web/inc/functions.fail2ban.inc.php +++ b/data/web/inc/functions.fail2ban.inc.php @@ -109,7 +109,7 @@ function fail2ban($_action, $_data = null, $_extra = null) { return false; } // Rules will also be recreated on log events, but rules may seem empty for a second in the UI - docker('post', 'netfilter-mailcow', 'restart'); + agent('request', 'netfilter', 'restart', array(), 30); $fail_count = 0; $regex_result = json_decode($redis->Get('F2B_REGEX'), true); while (empty($regex_result) && $fail_count < 10) { @@ -206,7 +206,7 @@ function fail2ban($_action, $_data = null, $_extra = null) { try { $redis->hSet('F2B_BLACKLIST', $network, 1); $redis->hDel('F2B_WHITELIST', $network, 1); - //$response = docker('post', 'netfilter-mailcow', 'restart'); + // netfilter picks up the redis changes } catch (RedisException $e) { $_SESSION['return'][] = array( diff --git a/data/web/inc/functions.inc.php b/data/web/inc/functions.inc.php index 89f14b574..58898088f 100644 --- a/data/web/inc/functions.inc.php +++ b/data/web/inc/functions.inc.php @@ -2239,30 +2239,19 @@ function rspamd_ui($action, $data = null) { ); return false; } - $docker_return = docker('post', 'rspamd-mailcow', 'exec', array('cmd' => 'rspamd', 'task' => 'worker_password', 'raw' => $rspamd_ui_pass), array('Content-Type: application/json')); - if ($docker_return_array = json_decode($docker_return, true)) { - if ($docker_return_array['type'] == 'success') { - $_SESSION['return'][] = array( - 'type' => 'success', - 'log' => array(__FUNCTION__, '*', '*'), - 'msg' => 'rspamd_ui_pw_set' - ); - return true; - } - else { - $_SESSION['return'][] = array( - 'type' => $docker_return_array['type'], - 'log' => array(__FUNCTION__, '*', '*'), - 'msg' => $docker_return_array['msg'] - ); - return false; - } - } - else { + $resp = agent('request_all', 'rspamd', 'exec.set-worker-password', array('password' => $rspamd_ui_pass), 30); + if (agent('ok', $resp)) { + $_SESSION['return'][] = array( + 'type' => 'success', + 'log' => array(__FUNCTION__, '*', '*'), + 'msg' => 'rspamd_ui_pw_set' + ); + return true; + } else { $_SESSION['return'][] = array( 'type' => 'danger', 'log' => array(__FUNCTION__, '*', '*'), - 'msg' => 'unknown' + 'msg' => agent('first_error', $resp) ?: 'rspamd: no live agent responded' ); return false; } diff --git a/data/web/inc/functions.mailbox.inc.php b/data/web/inc/functions.mailbox.inc.php index adb330ea8..c2811cb74 100644 --- a/data/web/inc/functions.mailbox.inc.php +++ b/data/web/inc/functions.mailbox.inc.php @@ -125,8 +125,8 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { fwrite($filter_handle, $script_data); fclose($filter_handle); } - $restart_response = json_decode(docker('post', 'dovecot-mailcow', 'restart'), true); - if ($restart_response['type'] == "success") { + $restart_response = agent('request', 'dovecot', 'restart', array(), 30); + if (agent('ok', $restart_response)) { $_SESSION['return'][] = array( 'type' => 'success', 'log' => array(__FUNCTION__, $_action, $_type, $_data_log, $_attr), @@ -160,8 +160,8 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { fwrite($filter_handle, $script_data); fclose($filter_handle); } - $restart_response = json_decode(docker('post', 'dovecot-mailcow', 'restart'), true); - if ($restart_response['type'] == "success") { + $restart_response = agent('request', 'dovecot', 'restart', array(), 30); + if (agent('ok', $restart_response)) { $_SESSION['return'][] = array( 'type' => 'success', 'log' => array(__FUNCTION__, $_action, $_type, $_data_log, $_attr), @@ -669,8 +669,8 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { } } if (!empty($restart_sogo)) { - $restart_response = json_decode(docker('post', 'sogo-mailcow', 'restart'), true); - if ($restart_response['type'] == "success") { + $restart_response = agent('request', 'sogo', 'restart', array(), 30); + if (agent('ok', $restart_response)) { $_SESSION['return'][] = array( 'type' => 'success', 'log' => array(__FUNCTION__, $_action, $_type, $_data_log, $_attr), @@ -3553,22 +3553,30 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { // get imap acls try { - $exec_fields = array( - 'cmd' => 'doveadm', - 'task' => 'get_acl', - 'id' => $old_username - ); - $imap_acls = json_decode(docker('post', 'dovecot-mailcow', 'exec', $exec_fields), true); + $acl_agg = agent('request_all', 'dovecot', 'exec.acl-get', array('user' => $old_username), 10); + $imap_acls = array(); + $seen = array(); + foreach ($acl_agg['responses'] as $r) { + if (empty($r['ok'])) continue; + foreach ((isset($r['result']['acls']) ? $r['result']['acls'] : array()) as $a) { + $key = (isset($a['mailbox']) ? $a['mailbox'] : '') . '|' . (isset($a['identifier']) ? $a['identifier'] : ''); + if (isset($seen[$key])) continue; + $seen[$key] = true; + $imap_acls[] = array( + 'user' => $old_username, + 'mailbox' => isset($a['mailbox']) ? $a['mailbox'] : '', + 'id' => isset($a['identifier']) ? $a['identifier'] : '', + 'rights' => isset($a['rights']) ? $a['rights'] : '', + ); + } + } // delete imap acls foreach ($imap_acls as $imap_acl) { - $exec_fields = array( - 'cmd' => 'doveadm', - 'task' => 'delete_acl', - 'user' => $imap_acl['user'], - 'mailbox' => $imap_acl['mailbox'], - 'id' => $imap_acl['id'] - ); - docker('post', 'dovecot-mailcow', 'exec', $exec_fields); + agent('request_all', 'dovecot', 'exec.acl-delete', array( + 'user' => $imap_acl['user'], + 'mailbox' => $imap_acl['mailbox'], + 'identifier' => $imap_acl['id'], + ), 10); } } catch (Exception $e) { $_SESSION['return'][] = array( @@ -3649,41 +3657,27 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { } // move maildir - $exec_fields = array( - 'cmd' => 'maildir', - 'task' => 'move', - 'old_maildir' => $domain . '/' . $old_local_part, - 'new_maildir' => $domain . '/' . $new_local_part - ); - if (getenv("CLUSTERMODE") == "replication") { - // broadcast to each dovecot container - docker('broadcast', 'dovecot-mailcow', 'exec', $exec_fields); - } else { - docker('post', 'dovecot-mailcow', 'exec', $exec_fields); - } + agent('request_all', 'dovecot', 'exec.maildir-move', array( + 'from' => $domain . '/' . $old_local_part, + 'to' => $domain . '/' . $new_local_part, + ), 30); // rename username in sogo - $exec_fields = array( - 'cmd' => 'sogo', - 'task' => 'rename_user', - 'old_username' => $old_username, - 'new_username' => $new_username - ); - docker('post', 'sogo-mailcow', 'exec', $exec_fields); + agent('request', 'sogo', 'exec.rename-user', array( + 'old' => $old_username, + 'new' => $new_username, + ), 30); // set imap acls foreach ($imap_acls as $imap_acl) { $user_id = ($imap_acl['id'] == $old_username) ? $new_username : $imap_acl['id']; $user = ($imap_acl['user'] == $old_username) ? $new_username : $imap_acl['user']; - $exec_fields = array( - 'cmd' => 'doveadm', - 'task' => 'set_acl', - 'user' => $user, - 'mailbox' => $imap_acl['mailbox'], - 'id' => $user_id, - 'rights' => $imap_acl['rights'] - ); - docker('post', 'dovecot-mailcow', 'exec', $exec_fields); + agent('request_all', 'dovecot', 'exec.acl-set', array( + 'user' => $user, + 'mailbox' => $imap_acl['mailbox'], + 'identifier' => $user_id, + 'rights' => $imap_acl['rights'], + ), 15); } // create alias @@ -4553,24 +4547,19 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { else { $_data = $_SESSION['mailcow_cc_username']; } - $exec_fields = array( - 'cmd' => 'sieve', - 'task' => 'list', - 'username' => $_data - ); - $filters = docker('post', 'dovecot-mailcow', 'exec', $exec_fields); - $filters = array_filter(preg_split("/(\r\n|\n|\r)/",$filters)); - foreach ($filters as $filter) { + $list_resp = agent('request', 'dovecot', 'exec.sieve-list', array('user' => $_data), 10); + if (empty($list_resp['ok'])) { + return false; + } + $scripts = isset($list_resp['result']['scripts']) ? $list_resp['result']['scripts'] : array(); + foreach ($scripts as $filter) { if (preg_match('/.+ ACTIVE/i', $filter)) { - $exec_fields = array( - 'cmd' => 'sieve', - 'task' => 'print', - 'script_name' => substr($filter, 0, -7), - 'username' => $_data - ); - $script = docker('post', 'dovecot-mailcow', 'exec', $exec_fields); - // Remove first line - return preg_replace('/^.+\n/', '', $script); + $print_resp = agent('request', 'dovecot', 'exec.sieve-print', array( + 'user' => $_data, + 'script' => substr($filter, 0, -7), + ), 10); + if (empty($print_resp['ok'])) return false; + return isset($print_resp['result']['body']) ? $print_resp['result']['body'] : ''; } } return false; @@ -5712,13 +5701,12 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { ); continue; } - $exec_fields = array('cmd' => 'maildir', 'task' => 'cleanup', 'maildir' => $domain); - $maildir_gc = json_decode(docker('post', 'dovecot-mailcow', 'exec', $exec_fields), true); - if ($maildir_gc['type'] != 'success') { + $maildir_gc = agent('request_all', 'dovecot', 'exec.maildir-cleanup', array('maildir' => $domain), 30); + if (!agent('ok', $maildir_gc)) { $_SESSION['return'][] = array( 'type' => 'warning', 'log' => array(__FUNCTION__, $_action, $_type, $_data_log, $_attr), - 'msg' => 'Could not move mail storage to garbage collector: ' . $maildir_gc['msg'] + 'msg' => 'Could not move mail storage to garbage collector: ' . agent('first_error', $maildir_gc) ); } $stmt = $pdo->prepare("DELETE FROM `domain` WHERE `domain` = :domain"); @@ -5967,20 +5955,13 @@ function mailbox($_action, $_type, $_data = null, $_extra = null) { $mailbox_details = mailbox('get', 'mailbox_details', $username); if (!empty($mailbox_details['domain']) && !empty($mailbox_details['local_part'])) { $maildir = $mailbox_details['domain'] . '/' . $mailbox_details['local_part']; - $exec_fields = array('cmd' => 'maildir', 'task' => 'cleanup', 'maildir' => $maildir); - - if (getenv("CLUSTERMODE") == "replication") { - // broadcast to each dovecot container - docker('broadcast', 'dovecot-mailcow', 'exec', $exec_fields); - } else { - $maildir_gc = json_decode(docker('post', 'dovecot-mailcow', 'exec', $exec_fields), true); - if ($maildir_gc['type'] != 'success') { - $_SESSION['return'][] = array( - 'type' => 'warning', - 'log' => array(__FUNCTION__, $_action, $_type, $_data_log, $_attr), - 'msg' => 'Could not move maildir to garbage collector: ' . $maildir_gc['msg'] - ); - } + $maildir_gc = agent('request_all', 'dovecot', 'exec.maildir-cleanup', array('maildir' => $maildir), 30); + if (!agent('ok', $maildir_gc)) { + $_SESSION['return'][] = array( + 'type' => 'warning', + 'log' => array(__FUNCTION__, $_action, $_type, $_data_log, $_attr), + 'msg' => 'Could not move maildir to garbage collector: ' . agent('first_error', $maildir_gc) + ); } } else { diff --git a/data/web/inc/functions.mailq.inc.php b/data/web/inc/functions.mailq.inc.php index f42ab7cc1..3586d3777 100644 --- a/data/web/inc/functions.mailq.inc.php +++ b/data/web/inc/functions.mailq.inc.php @@ -1,121 +1,138 @@ - 'danger', - 'log' => array(__FUNCTION__, $_action, $_data), - 'msg' => 'access_denied' - ); - return false; - } - function process_mailq_output($returned_output, $_action, $_data) { - if ($returned_output !== NULL) { - if ($_action == 'cat') { - logger(array('return' => array( - array( - 'type' => 'success', - 'log' => array(__FUNCTION__, $_action, $_data), - 'msg' => 'queue_cat_success' - ) - ))); - return $returned_output; - } - else { - if (isset($returned_output['type']) && $returned_output['type'] == 'danger') { - $_SESSION['return'][] = array( - 'type' => 'danger', - 'log' => array(__FUNCTION__, $_action, $_data), - 'msg' => 'Error: ' . $returned_output['msg'] - ); - } - if (isset($returned_output['type']) && $returned_output['type'] == 'success') { - $_SESSION['return'][] = array( - 'type' => 'success', - 'log' => array(__FUNCTION__, $_action, $_data), - 'msg' => 'queue_command_success' - ); - } - } - } - else { - $_SESSION['return'][] = array( - 'type' => 'danger', - 'log' => array(__FUNCTION__, $_action, $_data), - 'msg' => 'unknown' - ); - } - } - if ($_action == 'get') { - $mailq_lines = docker('post', 'postfix-mailcow', 'exec', array('cmd' => 'mailq', 'task' => 'list')); - $lines = 0; - // Hard limit to 10000 items - foreach (preg_split("/((\r?\n)|(\r\n?))/", $mailq_lines) as $mailq_item) if ($lines++ < 10000) { - if (empty($mailq_item) || $mailq_item == '1') { - continue; - } - $mq_line = json_decode($mailq_item, true); - if ($mq_line !== NULL) { - $rcpts = array(); - foreach ($mq_line['recipients'] as $rcpt) { - if (isset($rcpt['delay_reason'])) { - $rcpts[] = $rcpt['address'] . ' (' . $rcpt['delay_reason'] . ')'; - } - else { - $rcpts[] = $rcpt['address']; - } - } - if (!empty($rcpts)) { - $mq_line['recipients'] = $rcpts; - } - $line[] = $mq_line; - } - } - if (!isset($line) || empty($line)) { - return '[]'; - } - else { - return json_encode($line); - } - } - elseif ($_action == 'delete') { - if (!is_array($_data['qid'])) { - $qids = array(); - $qids[] = $_data['qid']; - } - else { - $qids = $_data['qid']; - } - $docker_return = docker('post', 'postfix-mailcow', 'exec', array('cmd' => 'mailq', 'task' => 'delete', 'items' => $qids)); - process_mailq_output(json_decode($docker_return, true), $_action, $_data); - } - elseif ($_action == 'cat') { - if (!is_array($_data['qid'])) { - $qids = array(); - $qids[] = $_data['qid']; - } - else { - $qids = $_data['qid']; - } - $docker_return = docker('post', 'postfix-mailcow', 'exec', array('cmd' => 'mailq', 'task' => 'cat', 'items' => $qids)); - return process_mailq_output($docker_return, $_action, $_data); - } - elseif ($_action == 'edit') { - if (in_array($_data['action'], array('hold', 'unhold', 'deliver'))) { - if (!is_array($_data['qid'])) { - $qids = array(); - $qids[] = $_data['qid']; - } - else { - $qids = $_data['qid']; - } - if (!empty($qids)) { - $docker_return = docker('post', 'postfix-mailcow', 'exec', array('cmd' => 'mailq', 'task' => $_data['action'], 'items' => $qids)); - process_mailq_output(json_decode($docker_return, true), $_action, $_data); - } - } - if (in_array($_data['action'], array('flush', 'super_delete'))) { - $docker_return = docker('post', 'postfix-mailcow', 'exec', array('cmd' => 'mailq', 'task' => $_data['action'])); - process_mailq_output(json_decode($docker_return, true), $_action, $_data); - } - } -} + 'danger', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => 'access_denied' + ); + return false; + } + switch ($_action) { + case 'get': + $agg = agent('request_all', 'postfix', 'exec.mailq', array(), 15); + $lines = array(); + foreach ($agg['responses'] as $r) { + if (empty($r['ok'])) continue; + $queue = isset($r['result']['queue']) ? $r['result']['queue'] : array(); + foreach ($queue as $entry) { + if (is_array($entry)) { + $entry['node'] = $r['node']; + if (!empty($entry['recipients']) && is_array($entry['recipients'])) { + $rcpts = array(); + foreach ($entry['recipients'] as $rcpt) { + $addr = isset($rcpt['address']) ? $rcpt['address'] : ''; + if (isset($rcpt['delay_reason'])) { + $rcpts[] = $addr . ' (' . $rcpt['delay_reason'] . ')'; + } + else { + $rcpts[] = $addr; + } + } + $entry['recipients'] = $rcpts; + } + $lines[] = $entry; + } + if (count($lines) >= 10000) break 2; + } + } + return empty($lines) ? '[]' : json_encode($lines); + break; + case 'delete': + $qids = isset($_data['qid']) && is_array($_data['qid']) ? $_data['qid'] : array($_data['qid']); + $ok_count = 0; + $failed = 0; + foreach ($qids as $qid) { + $agg = agent('request_all', 'postfix', 'exec.delete-from-queue', array('queue_id' => $qid), 10); + if (agent('ok', $agg)) { + $ok_count++; + } + else { + $failed++; + } + } + $ok = ($ok_count > 0 && $failed === 0); + $_SESSION['return'][] = array( + 'type' => $ok ? 'success' : 'danger', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => $ok ? 'queue_command_success' : 'queue_command_failed' + ); + return $ok; + break; + case 'cat': + $qids = isset($_data['qid']) && is_array($_data['qid']) ? $_data['qid'] : array($_data['qid']); + $body = ''; + foreach ($qids as $qid) { + $agg = agent('request_all', 'postfix', 'exec.cat-queue', array('queue_id' => $qid), 15); + foreach ($agg['responses'] as $r) { + if (!empty($r['ok']) && !empty($r['result']['body'])) { + $body .= $r['result']['body']; + } + } + } + if ($body === '') { + $_SESSION['return'][] = array( + 'type' => 'danger', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => 'queue_cat_empty' + ); + return null; + } + $_SESSION['return'][] = array( + 'type' => 'success', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => 'queue_cat_success' + ); + return $body; + break; + case 'edit': + $cmd_map = array( + 'hold' => 'exec.hold-queue', + 'unhold' => 'exec.unhold-queue', + 'deliver' => 'exec.deliver-now' + ); + if (isset($cmd_map[$_data['action']])) { + $qids = isset($_data['qid']) && is_array($_data['qid']) ? $_data['qid'] : array($_data['qid']); + $ok_count = 0; + $failed = 0; + foreach ($qids as $qid) { + $agg = agent('request_all', 'postfix', $cmd_map[$_data['action']], array('queue_id' => $qid), 10); + if (agent('ok', $agg)) { + $ok_count++; + } + else { + $failed++; + } + } + $ok = ($ok_count > 0 && $failed === 0); + $_SESSION['return'][] = array( + 'type' => $ok ? 'success' : 'danger', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => $ok ? 'queue_command_success' : 'queue_command_failed' + ); + return $ok; + } + if ($_data['action'] == 'flush') { + $agg = agent('request_all', 'postfix', 'exec.flush-queue', array(), 30); + $ok = agent('ok', $agg); + $_SESSION['return'][] = array( + 'type' => $ok ? 'success' : 'danger', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => $ok ? 'queue_command_success' : 'queue_command_failed' + ); + return $ok; + } + if ($_data['action'] == 'super_delete') { + $agg = agent('request_all', 'postfix', 'exec.super-delete', array(), 30); + $ok = agent('ok', $agg); + $_SESSION['return'][] = array( + 'type' => $ok ? 'success' : 'danger', + 'log' => array(__FUNCTION__, $_action, $_data), + 'msg' => $ok ? 'queue_command_success' : 'queue_command_failed' + ); + return $ok; + } + break; + } +} diff --git a/data/web/inc/prerequisites.inc.php b/data/web/inc/prerequisites.inc.php index 5e57a4d41..5e20c3d16 100644 --- a/data/web/inc/prerequisites.inc.php +++ b/data/web/inc/prerequisites.inc.php @@ -105,14 +105,6 @@ http_response_code(500); -
Connection to dockerapi container failed.

The following error was reported:
-
-Loading...'); $('#statusTriggerRestartContainer').html(lang_footer.restarting_container); + var payload = { 'service': container, 'action': 'restart' }; + if (node) payload.node = node; $.ajax({ method: 'get', url: '/inc/ajax/container_ctrl.php', timeout: docker_timeout, - data: { - 'service': container, - 'action': 'restart' - } + data: payload }) .always( function (data, status) { $('#statusTriggerRestartContainer').append(data); diff --git a/data/web/js/site/dashboard.js b/data/web/js/site/dashboard.js index aee361710..ff0f28045 100644 --- a/data/web/js/site/dashboard.js +++ b/data/web/js/site/dashboard.js @@ -23,8 +23,6 @@ $(document).ready(function() { } }); - // set update loop container list - containersToUpdate = {}; // set default ChartJs Font Color Chart.defaults.color = '#999'; // create host cpu and mem charts @@ -72,7 +70,6 @@ $(document).ready(function() { $("#host_show_ip").find(".spinner-border").addClass("d-none"); }); }); - update_container_stats(); }); jQuery(function($){ if (localStorage.getItem("current_page") === null) { @@ -210,6 +207,11 @@ jQuery(function($){ data: 'priority', defaultContent: '' }, + { + title: lang_debug.node, + data: 'node', + defaultContent: '-' + }, { title: lang.message, data: 'message', @@ -692,6 +694,11 @@ jQuery(function($){ data: 'priority', defaultContent: '' }, + { + title: lang_debug.node, + data: 'node', + defaultContent: '-' + }, { title: lang.message, data: 'message', @@ -747,6 +754,11 @@ jQuery(function($){ data: 'priority', defaultContent: '' }, + { + title: lang_debug.node, + data: 'node', + defaultContent: '-' + }, { title: lang.message, data: 'message', @@ -802,6 +814,11 @@ jQuery(function($){ data: 'priority', defaultContent: '' }, + { + title: lang_debug.node, + data: 'node', + defaultContent: '-' + }, { title: lang.message, data: 'message', @@ -862,6 +879,11 @@ jQuery(function($){ data: 'priority', defaultContent: '' }, + { + title: lang_debug.node, + data: 'node', + defaultContent: '-' + }, { title: lang.message, data: 'message', @@ -1292,52 +1314,6 @@ jQuery(function($){ // start polling host stats if tab is active onVisible("[id^=tab-containers]", () => update_stats()); - // start polling container stats if collapse is active - var containerElements = document.querySelectorAll(".container-details-collapse"); - for (let i = 0; i < containerElements.length; i++){ - new IntersectionObserver((entries, observer) => { - entries.forEach(entry => { - if(entry.intersectionRatio > 0) { - - if (!containerElements[i].classList.contains("show")){ - var container = containerElements[i].id.replace("Collapse", ""); - var container_id = containerElements[i].getAttribute("data-id"); - - // check if chart exists or needs to be created - if (!Chart.getChart(container + "_DiskIOChart")) - createReadWriteChart(container + "_DiskIOChart", "Read", "Write"); - if (!Chart.getChart(container + "_NetIOChart")) - createReadWriteChart(container + "_NetIOChart", "Recv", "Sent"); - - // add container to polling list - containersToUpdate[container] = { - id: container_id, - state: "idle" - } - - // stop polling if collapse is closed - containerElements[i].addEventListener('hidden.bs.collapse', function () { - var diskIOCtx = Chart.getChart(container + "_DiskIOChart"); - var netIOCtx = Chart.getChart(container + "_NetIOChart"); - - diskIOCtx.data.datasets[0].data = []; - diskIOCtx.data.datasets[1].data = []; - diskIOCtx.data.labels = []; - netIOCtx.data.datasets[0].data = []; - netIOCtx.data.datasets[1].data = []; - netIOCtx.data.labels = []; - - diskIOCtx.update(); - netIOCtx.update(); - - delete containersToUpdate[container]; - }); - } - - } - }); - }).observe(containerElements[i]); - } }); @@ -1351,127 +1327,49 @@ function update_stats(timeout=5){ window.fetch("/api/v1/get/status/host", {method:'GET',cache:'no-cache'}).then(function(response) { return response.json(); }).then(function(data) { - if (data){ - // display table data - $("#host_date").text(data.system_time); - $("#host_uptime").text(formatUptime(data.uptime)); - $("#host_cpu_cores").text(data.cpu.cores); - $("#host_cpu_usage").text(parseInt(data.cpu.usage).toString() + "%"); - $("#host_memory_total").text((data.memory.total / (1024 ** 3)).toFixed(2).toString() + "GB"); - $("#host_memory_usage").text(parseInt(data.memory.usage).toString() + "%"); - $("#host_architecture").html(data.architecture); - // update cpu and mem chart - var cpu_chart = Chart.getChart("host_cpu_chart"); - var mem_chart = Chart.getChart("host_mem_chart"); + // Wrapped in try/catch so a malformed payload doesn't break the + // polling loop forever. We always reschedule from .finally. + try { + if (data && data.cpu && data.memory){ + $("#host_date").text(data.system_time || ""); + $("#host_uptime").text(formatUptime(data.uptime)); + $("#host_cpu_cores").text(data.cpu.cores); + $("#host_cpu_usage").text(parseInt(data.cpu.usage).toString() + "%"); + $("#host_memory_total").text((data.memory.total / (1024 ** 3)).toFixed(2).toString() + "GB"); + $("#host_memory_usage").text(parseInt(data.memory.usage).toString() + "%"); + $("#host_architecture").html(data.architecture); - cpu_chart.data.labels.push(data.system_time.split(" ")[1]); - if (cpu_chart.data.labels.length > 30) cpu_chart.data.labels.shift(); - mem_chart.data.labels.push(data.system_time.split(" ")[1]); - if (mem_chart.data.labels.length > 30) mem_chart.data.labels.shift(); + var cpu_chart = Chart.getChart("host_cpu_chart"); + var mem_chart = Chart.getChart("host_mem_chart"); - cpu_chart.data.datasets[0].data.push(data.cpu.usage); - if (cpu_chart.data.datasets[0].data.length > 30) cpu_chart.data.datasets[0].data.shift(); - mem_chart.data.datasets[0].data.push(data.memory.usage); - if (mem_chart.data.datasets[0].data.length > 30) mem_chart.data.datasets[0].data.shift(); + if (cpu_chart && mem_chart && typeof data.system_time === "string") { + var label = data.system_time.split(" ")[1] || ""; + cpu_chart.data.labels.push(label); + if (cpu_chart.data.labels.length > 30) cpu_chart.data.labels.shift(); + mem_chart.data.labels.push(label); + if (mem_chart.data.labels.length > 30) mem_chart.data.labels.shift(); - cpu_chart.update(); - mem_chart.update(); + cpu_chart.data.datasets[0].data.push(data.cpu.usage); + if (cpu_chart.data.datasets[0].data.length > 30) cpu_chart.data.datasets[0].data.shift(); + mem_chart.data.datasets[0].data.push(data.memory.usage); + if (mem_chart.data.datasets[0].data.length > 30) mem_chart.data.datasets[0].data.shift(); + + cpu_chart.update(); + mem_chart.update(); + } + } else { + console.warn("update_stats: unexpected host payload", data); + } + } catch (e) { + console.warn("update_stats: render error", e); } - - // run again in n seconds + }).catch(function(e) { + console.warn("update_stats: fetch failed", e); + }).finally(function() { + // Always reschedule so a transient backend hiccup can't kill the poll loop. setTimeout(update_stats, timeout * 1000); }); } -// update specific container stats - every n (default 5s) seconds -function update_container_stats(timeout=5){ - - if ($('#tab-containers').hasClass('active')) { - for (let container in containersToUpdate){ - container_id = containersToUpdate[container].id; - // check if container update stats is already running - if (containersToUpdate[container].state == "running") - continue; - containersToUpdate[container].state = "running"; - - - window.fetch("/api/v1/get/status/container/" + container_id, {method:'GET',cache:'no-cache'}).then(function(response) { - return response.json(); - }).then(function(data) { - var diskIOCtx = Chart.getChart(container + "_DiskIOChart"); - var netIOCtx = Chart.getChart(container + "_NetIOChart"); - - prev_stats = null; - if (data.length >= 2){ - prev_stats = data[data.length -2]; - - // hide spinners if we collected enough data - $('#' + container + "_DiskIOChart").removeClass('d-none'); - $('#' + container + "_DiskIOChart").prev().addClass('d-none'); - $('#' + container + "_NetIOChart").removeClass('d-none'); - $('#' + container + "_NetIOChart").prev().addClass('d-none'); - } - - data = data[data.length -1]; - - if (prev_stats != null){ - // calc time diff - var time_diff = (new Date(data.read) - new Date(prev_stats.read)) / 1000; - - // calc disk io b/s - if ('io_service_bytes_recursive' in prev_stats.blkio_stats && prev_stats.blkio_stats.io_service_bytes_recursive !== null){ - var prev_read_bytes = 0; - var prev_write_bytes = 0; - for (var i = 0; i < prev_stats.blkio_stats.io_service_bytes_recursive.length; i++){ - if (prev_stats.blkio_stats.io_service_bytes_recursive[i].op == "read") - prev_read_bytes = prev_stats.blkio_stats.io_service_bytes_recursive[i].value; - else if (prev_stats.blkio_stats.io_service_bytes_recursive[i].op == "write") - prev_write_bytes = prev_stats.blkio_stats.io_service_bytes_recursive[i].value; - } - var read_bytes = 0; - var write_bytes = 0; - for (var i = 0; i < data.blkio_stats.io_service_bytes_recursive.length; i++){ - if (data.blkio_stats.io_service_bytes_recursive[i].op == "read") - read_bytes = data.blkio_stats.io_service_bytes_recursive[i].value; - else if (data.blkio_stats.io_service_bytes_recursive[i].op == "write") - write_bytes = data.blkio_stats.io_service_bytes_recursive[i].value; - } - var diff_bytes_read = (read_bytes - prev_read_bytes) / time_diff; - var diff_bytes_write = (write_bytes - prev_write_bytes) / time_diff; - } - - // calc net io b/s - if ('networks' in prev_stats){ - var prev_recv_bytes = 0; - var prev_sent_bytes = 0; - for (var key in prev_stats.networks){ - prev_recv_bytes += prev_stats.networks[key].rx_bytes; - prev_sent_bytes += prev_stats.networks[key].tx_bytes; - } - var recv_bytes = 0; - var sent_bytes = 0; - for (var key in data.networks){ - recv_bytes += data.networks[key].rx_bytes; - sent_bytes += data.networks[key].tx_bytes; - } - var diff_bytes_recv = (recv_bytes - prev_recv_bytes) / time_diff; - var diff_bytes_sent = (sent_bytes - prev_sent_bytes) / time_diff; - } - - addReadWriteChart(diskIOCtx, diff_bytes_read, diff_bytes_write, ""); - addReadWriteChart(netIOCtx, diff_bytes_recv, diff_bytes_sent, ""); - } - - // run again in n seconds - containersToUpdate[container].state = "idle"; - }).catch(err => { - console.log(err); - }); - } - } - - // run again in n seconds - setTimeout(update_container_stats, timeout * 1000); -} // format hosts uptime seconds to readable string function formatUptime(seconds){ seconds = Number(seconds); diff --git a/data/web/json_api.php b/data/web/json_api.php index 2d315a0b1..a6efb14e5 100644 --- a/data/web/json_api.php +++ b/data/web/json_api.php @@ -1461,42 +1461,66 @@ if (isset($_GET['query'])) { if ($_SESSION['mailcow_cc_role'] == "admin") { switch ($object) { case "containers": - $containers = (docker('info')); - foreach ($containers as $container => $container_info) { - $container . ' (' . $container_info['Config']['Image'] . ')'; - $containerstarttime = ($container_info['State']['StartedAt']); - $containerstate = ($container_info['State']['Status']); - $containerimage = ($container_info['Config']['Image']); - $temp[$container] = array( + $temp = array(); + foreach (agent('services') as $svc) { + $nodes = agent('live_nodes', $svc); + $first = $nodes ? $nodes[0] : ''; + $meta = $first ? (agent('node_meta', $svc, $first) ?: array()) : array(); + $key = $svc . '-mailcow'; + $temp[$key] = array( 'type' => 'info', - 'container' => $container, - 'state' => $containerstate, - 'started_at' => $containerstarttime, - 'image' => $containerimage + 'container' => $key, + 'state' => $nodes ? 'running' : 'exited', + 'node_count' => count($nodes), + 'started_at' => isset($meta['started_at']) ? $meta['started_at'] : '', + 'image' => isset($meta['image']) ? $meta['image'] : '', + 'external' => false ); } + foreach (infra('status') as $key => $entry) { + $temp[$key] = array( + 'type' => 'info', + 'container' => $key, + 'state' => $entry['State']['Running'] ? 'running' : 'exited', + 'node_count' => $entry['State']['NodeCount'], + 'started_at' => '', + 'image' => $entry['Config']['Image'], + 'error' => $entry['State']['Error'], + 'external' => true + ); + } + ksort($temp); echo json_encode($temp, JSON_UNESCAPED_SLASHES); break; case "container": - $container_stats = docker('container_stats', $extra); - echo json_encode($container_stats); + $stats = null; + foreach (agent('services') as $svc) { + $s = agent('node_stats', $svc, $extra); + if ($s) { + $stats = $s; + break; + } + } + echo json_encode($stats); break; case "vmail": - $exec_fields_vmail = array('cmd' => 'system', 'task' => 'df', 'dir' => '/var/vmail'); - $vmail_df = explode(',', json_decode(docker('post', 'dovecot-mailcow', 'exec', $exec_fields_vmail), true)); + $vmail_resp = agent('request', 'dovecot', 'exec.df', array('dir' => '/var/vmail'), 5); + $vmail_df = (!empty($vmail_resp['ok']) && is_string($vmail_resp['result'])) + ? explode(',', $vmail_resp['result']) + : array('', '', '', '', '', '/var/vmail'); $temp = array( 'type' => 'info', 'disk' => $vmail_df[0], 'used' => $vmail_df[2], - 'total'=> $vmail_df[1], + 'total' => $vmail_df[1], 'used_percent' => $vmail_df[4] ); echo json_encode($temp, JSON_UNESCAPED_SLASHES); break; case "host": - if (!$extra){ - $stats = docker("host_stats"); - echo json_encode($stats); + if (!$extra) { + $host_resp = agent('request', 'host', 'exec.host-stats', array(), 5); + echo json_encode(!empty($host_resp['ok']) ? $host_resp['result'] : null); } else if ($extra == "ip") { // get public ips diff --git a/data/web/lang/lang.de-de.json b/data/web/lang/lang.de-de.json index 3672762df..6d1073261 100644 --- a/data/web/lang/lang.de-de.json +++ b/data/web/lang/lang.de-de.json @@ -559,7 +559,11 @@ "template_exists": "Vorlage %s existiert bereits", "template_id_invalid": "Vorlagen-ID %s ungültig", "template_name_invalid": "Name der Vorlage ungültig", - "required_data_missing": "Die benötigten Daten: %s fehlen" + "required_data_missing": "Die benötigten Daten: %s fehlen", + "no_live_agent": "Kein aktiver Agent für Service %s", + "agent_timeout": "Agent-Timeout", + "agent_unknown_error": "Unbekannter Fehler vom Agent", + "queue_command_failed": "Queue-Befehl fehlgeschlagen" }, "datatables": { "collapse_all": "Alle Einklappen", @@ -623,7 +627,25 @@ "no_update_available": "Das System ist auf aktuellem Stand", "update_failed": "Es konnte nicht nach einem Update gesucht werden", "username": "Benutzername", - "wip": "Aktuell noch in Arbeit" + "wip": "Aktuell noch in Arbeit", + "data_stores": "Datenspeicher", + "nodes": "Knoten", + "disk_io": "Disk-I/O", + "net_io": "Netz-I/O", + "replicas_badge": "%d× Replicas", + "replicas_title": "Lebende Agent-Replicas", + "external_dep_info": "Externe Infrastruktur — Health-Check per Protokoll-Ping", + "status_ok": "OK", + "status_down": "down", + "status_healthy": "verbunden", + "status_unreachable": "nicht erreichbar", + "unknown": "unbekannt", + "restart_all_nodes": "Alle Knoten neu starten", + "restart_node": "Diesen Knoten neu starten", + "nodes_count": "%d Knoten", + "node": "Knoten", + "container_unhealthy": "Service nicht gesund", + "container_degraded": "Service teilweise gesund" }, "diagnostics": { "cname_from_a": "Wert abgeleitet von A/AAAA-Eintrag. Wird unterstützt, sofern der Eintrag auf die korrekte Ressource zeigt.", @@ -1207,7 +1229,8 @@ "verified_fido2_login": "FIDO2-Anmeldung verifiziert", "verified_totp_login": "TOTP-Anmeldung verifiziert", "verified_webauthn_login": "WebAuthn-Anmeldung verifiziert", - "verified_yotp_login": "Yubico-OTP-Anmeldung verifiziert" + "verified_yotp_login": "Yubico-OTP-Anmeldung verifiziert", + "service_restart_ok": "Service erfolgreich neu gestartet" }, "tfa": { "authenticators": "Authentikatoren", diff --git a/data/web/lang/lang.en-gb.json b/data/web/lang/lang.en-gb.json index e786bcbe5..d8e01c8ed 100644 --- a/data/web/lang/lang.en-gb.json +++ b/data/web/lang/lang.en-gb.json @@ -559,7 +559,11 @@ "validity_missing": "Please assign a period of validity", "value_missing": "Please provide all values", "version_invalid": "Version %s is invalid", - "yotp_verification_failed": "Yubico OTP verification failed: %s" + "yotp_verification_failed": "Yubico OTP verification failed: %s", + "no_live_agent": "No live agent for service %s", + "agent_timeout": "Agent timed out", + "agent_unknown_error": "Unknown error returned by agent", + "queue_command_failed": "Queue command failed" }, "datatables": { "collapse_all": "Collapse All", @@ -623,7 +627,25 @@ "no_update_available": "The System is on the latest version", "update_failed": "Could not check for an Update", "username": "Username", - "wip": "Currently Work in Progress" + "wip": "Currently Work in Progress", + "data_stores": "Data stores", + "nodes": "Nodes", + "disk_io": "Disk I/O", + "net_io": "Net I/O", + "replicas_badge": "%d× replicas", + "replicas_title": "Live agent replicas", + "external_dep_info": "External infrastructure dependency — health checked via protocol ping", + "status_ok": "ok", + "status_down": "down", + "status_healthy": "healthy", + "status_unreachable": "unreachable", + "unknown": "unknown", + "restart_all_nodes": "Restart all nodes", + "restart_node": "Restart this node", + "nodes_count": "%d node(s)", + "node": "Node", + "container_unhealthy": "Service unhealthy", + "container_degraded": "Service degraded" }, "diagnostics": { "cname_from_a": "Value derived from A/AAAA record. This is supported as long as the record points to the correct resource.", @@ -1214,7 +1236,8 @@ "verified_fido2_login": "Verified FIDO2 login", "verified_totp_login": "Verified TOTP login", "verified_webauthn_login": "Verified WebAuthn login", - "verified_yotp_login": "Verified Yubico OTP login" + "verified_yotp_login": "Verified Yubico OTP login", + "service_restart_ok": "Service restarted successfully" }, "tfa": { "authenticators": "Authenticators", diff --git a/data/web/templates/dashboard.twig b/data/web/templates/dashboard.twig index 3c57afa5c..7c49a8620 100644 --- a/data/web/templates/dashboard.twig +++ b/data/web/templates/dashboard.twig @@ -162,23 +162,64 @@ - + + {% if infra_containers %} +
+
+
+ {{ lang.debug.data_stores }} + {% for container, info in infra_containers %} + {% set svc = info.Service %} + {% set icon = svc == 'mysql' ? 'bi-database' : (svc == 'redis' ? 'bi-lightning-charge' : 'bi-cpu') %} +
+ +
+ {{ svc }} + + {{ info.Config.Image|default(lang.debug.unknown) }} + +
+ {% if info.State.Running == 1 %} + {{ lang.debug.status_ok }} + {% else %} + {{ lang.debug.status_down }} + {% endif %} +
+ {% endfor %} +
+
+
+ {% endif %} + +
{{ lang.debug.containers_info }}
- {% for container, container_info in containers %}
{{ container }} - ({{ container_info.Config.Image }}) - ({{ lang.debug.started_on }} {{ container_info.State.StartedAtHR }}) - {% if container_info.State.Running == 1 %} + {{ lang.debug.nodes_count|format(container_info.State.NodeCount) }} + {% if container_info.Config.Image %} + {{ container_info.Config.Image }} + {% endif %} + {% if container_info.State.StartedAtHR and container_info.State.StartedAtHR != '—' %} + {{ lang.debug.started_on }} {{ container_info.State.StartedAtHR }} + {% endif %} + {% if container_info.State.Running == 1 and container_info.State.Health == 'fail' %} + + {{ lang.debug.container_unhealthy }} + + {% elseif container_info.State.Running == 1 and container_info.State.Health == 'degraded' %} + + {{ lang.debug.container_degraded }} ({{ container_info.State.UnhealthyCount }}/{{ container_info.State.NodeCount }}) + + {% elseif container_info.State.Running == 1 %} {{ lang.debug.container_running }} . @@ -198,38 +239,49 @@
-
-
-
-
Disk I/O
-
- Loading... -
- -
-
-
Net I/O
-
- Loading... -
- -
-
+
+
+
{{ lang.debug.nodes }}
+ {% if container_info.Nodes|length > 0 %} +
    + {% for n in container_info.Nodes %} +
  • + + {% if n.Health == 'fail' %} + + {% elseif n.Health == 'ok' %} + + {% else %} + + {% endif %} + {{ n.NodeId matches '/^[0-9a-f]{12}$/' ? n.NodeId[:8] : n.NodeId }} + + cpu {{ n.CPUPercent ?: '0.00' }}% + · mem {{ ((n.MemoryBytes ?: 0) / 1024 / 1024) | round }} MiB + + {% if n.Health == 'fail' and n.HealthDetail %} + {{ n.HealthDetail }} + {% endif %} + + + {{ lang.debug.restart_node }} + +
  • + {% endfor %} +
+ {% else %} +
+ {% endif %} +
diff --git a/docker-compose.yml b/docker-compose.yml index 90e5518d0..d06f58d8e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,15 @@ services: unbound-mailcow: - image: ghcr.io/mailcow/unbound:1.25 + image: ghcr.io/mailcow/unbound:nightly-14052026 + depends_on: + - redis-mailcow environment: - TZ=${TZ} - SKIP_UNBOUND_HEALTHCHECK=${SKIP_UNBOUND_HEALTHCHECK:-n} + - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} + - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} + - REDISPASS=${REDISPASS} volumes: - ./data/hooks/unbound:/hooks:Z - ./data/conf/unbound/unbound.conf:/etc/unbound/unbound.conf:ro,Z @@ -21,6 +26,12 @@ services: depends_on: - unbound-mailcow - netfilter-mailcow + healthcheck: + test: ["CMD", "mariadb-admin", "ping", "-h", "localhost", "-u", "root", "-p${DBROOT}"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s stop_grace_period: 45s volumes: - mysql-vol-1:/var/lib/mysql/ @@ -47,6 +58,12 @@ services: volumes: - redis-vol-1:/data/ - ./data/conf/redis/redis-conf.sh:/redis-conf.sh:z + healthcheck: + test: ["CMD", "sh", "-c", "redis-cli -a \"$$REDISPASS\" --no-auth-warning ping | grep PONG"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s restart: always depends_on: - netfilter-mailcow @@ -65,16 +82,21 @@ services: - redis clamd-mailcow: - image: ghcr.io/mailcow/clamd:1.71 + image: ghcr.io/mailcow/clamd:nightly-14052026 restart: always depends_on: unbound-mailcow: condition: service_healthy + redis-mailcow: + condition: service_started dns: - ${IPV4_NETWORK:-172.22.1}.254 environment: - TZ=${TZ} - SKIP_CLAMD=${SKIP_CLAMD:-n} + - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} + - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} + - REDISPASS=${REDISPASS} volumes: - ./data/conf/clamav/:/etc/clamav/:Z - clamd-db-vol-1:/var/lib/clamav @@ -84,7 +106,7 @@ services: - clamd rspamd-mailcow: - image: ghcr.io/mailcow/rspamd:3.14.3-1 + image: ghcr.io/mailcow/rspamd:nightly-14052026 stop_grace_period: 30s depends_on: - dovecot-mailcow @@ -117,7 +139,7 @@ services: - rspamd php-fpm-mailcow: - image: ghcr.io/mailcow/phpfpm:8.2.29-2 + image: ghcr.io/mailcow/phpfpm:nightly-14052026 command: "php-fpm -d date.timezone=${TZ} -d expose_php=0" depends_on: - redis-mailcow @@ -200,7 +222,7 @@ services: - phpfpm sogo-mailcow: - image: ghcr.io/mailcow/sogo:5.12.8-1 + image: ghcr.io/mailcow/sogo:nightly-14052026 environment: - DBNAME=${DBNAME} - DBUSER=${DBUSER} @@ -252,7 +274,7 @@ services: - sogo dovecot-mailcow: - image: ghcr.io/mailcow/dovecot:2.3.21.1-2 + image: ghcr.io/mailcow/dovecot:nightly-14052026 depends_on: - mysql-mailcow - netfilter-mailcow @@ -339,7 +361,7 @@ services: - dovecot postfix-mailcow: - image: ghcr.io/mailcow/postfix:3.7.11-2 + image: ghcr.io/mailcow/postfix:nightly-14052026 depends_on: mysql-mailcow: condition: service_started @@ -382,7 +404,7 @@ services: - postfix postfix-tlspol-mailcow: - image: ghcr.io/mailcow/postfix-tlspol:1.8.23 + image: ghcr.io/mailcow/postfix-tlspol:nightly-14052026 depends_on: unbound-mailcow: condition: service_healthy @@ -408,6 +430,12 @@ services: restart: always environment: - TZ=${TZ} + healthcheck: + test: ["CMD-SHELL", "nc -z localhost 11211"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 15s networks: mailcow-network: aliases: @@ -419,7 +447,7 @@ services: - php-fpm-mailcow - sogo-mailcow - rspamd-mailcow - image: ghcr.io/mailcow/nginx:1.06 + image: ghcr.io/mailcow/nginx:nightly-14052026 dns: - ${IPV4_NETWORK:-172.22.1}.254 environment: @@ -438,6 +466,9 @@ services: - REDISHOST=${REDISHOST:-} - IPV4_NETWORK=${IPV4_NETWORK:-172.22.1} - NGINX_USE_PROXY_PROTOCOL=${NGINX_USE_PROXY_PROTOCOL:-n} + - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} + - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} + - REDISPASS=${REDISPASS} - TRUSTED_PROXIES=${TRUSTED_PROXIES:-} volumes: - ./data/web:/web:ro,z @@ -465,7 +496,7 @@ services: condition: service_started unbound-mailcow: condition: service_healthy - image: ghcr.io/mailcow/acme:1.97 + image: ghcr.io/mailcow/acme:nightly-14052026 dns: - ${IPV4_NETWORK:-172.22.1}.254 environment: @@ -506,7 +537,7 @@ services: - acme netfilter-mailcow: - image: ghcr.io/mailcow/netfilter:1.64 + image: ghcr.io/mailcow/netfilter:nightly-14052026 stop_grace_period: 30s restart: always privileged: true @@ -516,8 +547,11 @@ services: - IPV6_NETWORK=${IPV6_NETWORK:-fd4d:6169:6c63:6f77::/64} - SNAT_TO_SOURCE=${SNAT_TO_SOURCE:-n} - SNAT6_TO_SOURCE=${SNAT6_TO_SOURCE:-n} - - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} - - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} + # network_mode: host means we get the host's resolver, which can't + # see the `redis-mailcow` compose alias. Point the agent at redis + # via the bridge IP (overridable through REDIS_SLAVEOF_IP). + - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-${IPV4_NETWORK:-172.22.1}.249} + - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-6379} - REDISPASS=${REDISPASS} - MAILCOW_REPLICA_IP=${MAILCOW_REPLICA_IP:-} - DISABLE_NETFILTER_ISOLATION_RULE=${DISABLE_NETFILTER_ISOLATION_RULE:-n} @@ -526,7 +560,7 @@ services: - /lib/modules:/lib/modules:ro watchdog-mailcow: - image: ghcr.io/mailcow/watchdog:2.11 + image: ghcr.io/mailcow/watchdog:nightly-14052026 dns: - ${IPV4_NETWORK:-172.22.1}.254 tmpfs: @@ -600,28 +634,25 @@ services: aliases: - watchdog - dockerapi-mailcow: - image: ghcr.io/mailcow/dockerapi:2.12 - security_opt: - - label=disable + host-agent-mailcow: + image: ghcr.io/mailcow/host-agent:nightly-14052026 restart: always - dns: - - ${IPV4_NETWORK:-172.22.1}.254 environment: - - DBROOT=${DBROOT} - TZ=${TZ} - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} - REDISPASS=${REDISPASS} + - MAILCOW_AGENT_SERVICE=host volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro + - /proc:/host/proc:ro + - /:/host/rootfs:ro networks: mailcow-network: aliases: - - dockerapi + - host-agent olefy-mailcow: - image: ghcr.io/mailcow/olefy:1.15 + image: ghcr.io/mailcow/olefy:nightly-14052026 restart: always environment: - TZ=${TZ} @@ -634,6 +665,9 @@ services: - OLEFY_MINLENGTH=500 - OLEFY_DEL_TMP=1 - SKIP_OLEFY=${SKIP_OLEFY:-n} + - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} + - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} + - REDISPASS=${REDISPASS} networks: mailcow-network: aliases: