diff --git a/services/rabbit/.gitignore b/services/rabbit/.gitignore new file mode 100644 index 00000000..e67ff838 --- /dev/null +++ b/services/rabbit/.gitignore @@ -0,0 +1,6 @@ +*.yml +*.env +!template*.env +!erlang.cookie.secret.template +rabbitmq.conf +haproxy.cfg diff --git a/services/rabbit/.operations.Makefile b/services/rabbit/.operations.Makefile new file mode 100644 index 00000000..a8933549 --- /dev/null +++ b/services/rabbit/.operations.Makefile @@ -0,0 +1,105 @@ +# +# Variables +# + +LOAD_BALANCER_STACK_NAME := rabbit-loadbalancer + +MAKEFLAGS += --no-print-directory + +# +# Helpers +# + +define create_node_stack_name +rabbit-node0$(1) +endef + +validate-NODE_COUNT: guard-NODE_COUNT + @if ! echo "$(NODE_COUNT)" | grep --quiet --extended-regexp '^[1-9]$$'; then \ + echo NODE_COUNT must be a positive single digit integer; \ + exit 1; \ + fi + +validate-node-ix0%: .env + @if ! echo "$*" | grep --quiet --extended-regexp '^[1-9]$$'; then \ + echo "Node index $* must be a positive single digit integer"; \ + exit 1; \ + fi + + @set -o allexport; . $<; set +o allexport; \ + if [ "$*" -lt 1 ] || [ "$*" -gt "$$RABBIT_CLUSTER_NODE_COUNT" ]; then \ + echo "Node index $* is out of range 1..$$RABBIT_CLUSTER_NODE_COUNT"; \ + exit 1; \ + fi + +# +# Cluster level +# + +### Note: up operation is called by CI automatically +### it must NOT deploy stacks if they are already running +### to avoid breaking existing cluster (stopping all nodes at once) +up: start-cluster + +down: stop-cluster + +start-cluster: start-all-nodes start-loadbalancer + +update-cluster stop-cluster: + @$(error This operation may break cluster. Check README for details.) + +# +# Load Balancer +# + +start-loadbalancer: .stack.loadbalancer.yml + @docker stack deploy --with-registry-auth --prune --compose-file $< $(LOAD_BALANCER_STACK_NAME) + +update-loadbalancer: start-loadbalancer + +stop-loadbalancer: + @docker stack rm $(LOAD_BALANCER_STACK_NAME) + +# +# Rabbit all Nodes together +# + +.start-all-nodes: validate-NODE_COUNT + @i=1; \ + while [ $$i -le $(NODE_COUNT) ]; do \ + $(MAKE) start-node0$$i; \ + i=$$((i + 1)); \ + done + +start-all-nodes: .env + @source $<; \ + $(MAKE) .start-all-nodes NODE_COUNT=$$RABBIT_CLUSTER_NODE_COUNT + +update-all-nodes: + @$(error Updating all nodes at the same time may break the cluster \ + as it may restart (i.e. stop) all nodes at the same time. \ + Update one node at a time) + +stop-all-nodes: + @$(error Stopping all nodes at the same time breaks the cluster. \ + Update one node at a time. \ + Read more at https://groups.google.com/g/rabbitmq-users/c/owvanX2iSqA/m/ZAyRDhRfCQAJ) + +# +# Rabbit Node level +# + +start-node0%: validate-node-ix0% .stack.node0%.yml + @STACK_NAME=$(call create_node_stack_name,$*); \ + if docker stack ls --format '{{.Name}}' | grep --silent "$$STACK_NAME"; then \ + echo "Rabbit Node $* is already running, skipping"; \ + else \ + echo "Starting Rabbit Node $* ..."; \ + docker stack deploy --with-registry-auth --prune --compose-file $(word 2,$^) $(call create_node_stack_name,$*); \ + fi + +update-node0%: validate-node-ix0% .stack.node0%.yml + @docker stack deploy --detach=false --with-registry-auth --prune --compose-file $(word 2,$^) $(call create_node_stack_name,$*) + +stop-node0%: validate-node-ix0% + @docker stack rm --detach=false $(call create_node_stack_name,$*) diff --git a/services/rabbit/Makefile b/services/rabbit/Makefile new file mode 100644 index 00000000..349a36dc --- /dev/null +++ b/services/rabbit/Makefile @@ -0,0 +1,66 @@ +REPO_BASE_DIR := $(shell git rev-parse --show-toplevel) + +include ${REPO_BASE_DIR}/scripts/common-services.Makefile +# common-services.Makefile should be included first as common.Makefile +# relies on STACK_NAME var which is defined in common-services.Makefile +include ${REPO_BASE_DIR}/scripts/common.Makefile + +# +# Operations +# + +include ${REPO_BASE_DIR}/services/rabbit/.operations.Makefile + +# +# Docker compose files +# + +### Load Balancer +docker-compose.loadbalancer.yml: docker-compose.loadbalancer.yml.j2 \ + .env \ + configs/rabbitmq.conf \ + configs/erlang.cookie.secret \ + configs/haproxy.cfg \ + venv \ + $(VENV_BIN)/j2 + @$(call jinja, $<, .env, $@) + +.stack.loadbalancer.yml: docker-compose.loadbalancer.yml .env + @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< > $@ + +### Node + +.PRECIOUS: node0%.env +node0%.env: .env + envsubst < $< > $@; \ + echo NODE_INDEX=$* >> $@ + +.PRECIOUS: docker-compose.node0%.yml +docker-compose.node0%.yml: docker-compose.node0x.yml.j2 \ + node0%.env \ + configs/rabbitmq.conf \ + configs/erlang.cookie.secret \ + configs/haproxy.cfg \ + venv \ + $(VENV_BIN)/j2 + @$(call jinja, $<, node0$*.env, $@) + +.PRECIOUS: .stack.node0%.yml +.stack.node0%.yml: docker-compose.node0%.yml node0%.env + @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e node0$*.env $< > $@ + +# +# Config / Secret files +# + +configs/erlang.cookie.secret: configs/erlang.cookie.secret.template .env + @set -a; source .env; set +a; \ + envsubst < $< > $@ + +configs/rabbitmq.conf: configs/rabbitmq.conf.j2 .env venv + # generate $@ + @$(call jinja, $<, .env, $@) + +configs/haproxy.cfg: configs/haproxy.cfg.j2 .env venv + # generate $@ + @$(call jinja, $<, .env, $@) diff --git a/services/rabbit/README.md b/services/rabbit/README.md new file mode 100644 index 00000000..7358c88a --- /dev/null +++ b/services/rabbit/README.md @@ -0,0 +1,49 @@ +## Starting a cluster + +Make sure all nodes have joined the cluster before using it. Otherwise, number of replicas in quorum queues might be affected. Say, you have a cluster of 3 nodes. You connect to cluster before the 3rd node join it. Your quorum queue would end up with only 2 replicas and will be broken once, 1 node (of 2 nodes holding the replicas of the queue) goes down. + +## Updating a cluster + +Perform update one node at a time. Never update all nodes at the same time (this may break cluster)! Follow instructions from official documentation https://www.rabbitmq.com/docs/upgrade#rolling-upgrade. + +## Graceful shutdown + +Shutdown nodes one by one gracefully. Wait until the nodes is stopped and leaves the cluster. Then remove next node. When starting cluster, start nodes **in the reverse order**! For example, if you shutdown node01, then node02 and lastly node03, first start node03 then node02 and finally node01. + +If all Nodes were shutdown simultaneously, then you will see mnesia tables errors in node's logs. Restarting node solves the issue. Documentation also mentions force_boot CLI command in this case (see https://www.rabbitmq.com/docs/man/rabbitmqctl.8#force_boot) + +## How to add / remove nodes + +The only supported way, is to completely shutdown the cluster (docker stack and most likely rabbit node volumes) and start brand new. + +With manual effort this can be done on the running cluster, by adding 1 more rabbit node manually (as a separate docker stack or new service) and manually executing rabbitmqctl commands (some hints can be found here https://www.rabbitmq.com/docs/clustering#creating) + +## Updating rabbitmq.conf / advanced.config (zero-downtime) + +We do not support this automated (except starting from scratch with empty volumes). But manually this can be achieved in case needed. `rabbitmq.conf` and `advanced.config` changes take effect after a node restart. This can be performed with zero-downtime when RabbitMQ is clustered (have multiple nodes). This can be achieved by stopping and starting rabbitmq nodes one by one +* `docker exec -it bash` +* (inside container) `rabbitmqctl stop_app` and wait some time until node is stopped (can be seen in management ui) +* (inside container) `rabbitmqctl start_app` + +Source: https://www.rabbitmq.com/docs/next/configure#config-changes-effects + +## Enable node Maintenance mode + +1. Get inside container's shell (`docker exec -it bash`) +2. (Inside container) execute `rabbitmq-upgrade drain` + +Source: https://www.rabbitmq.com/docs/upgrade#maintenance-mode + +## Troubleshooting +mnesia errors after all rabbit nodes (docker services) restart: +* https://stackoverflow.com/questions/60407082/rabbit-mq-error-while-waiting-for-mnesia-tables + +official documentation mentioning restart scenarios +* https://www.rabbitmq.com/docs/clustering#restarting-schema-sync + +all (3) cluster nodes go down simultaneosuly, cluster is broken: +* https://groups.google.com/g/rabbitmq-users/c/owvanX2iSqA + +## Autoscaling + +Not supported at the moment. diff --git a/services/rabbit/configs/erlang.cookie.secret.template b/services/rabbit/configs/erlang.cookie.secret.template new file mode 100644 index 00000000..850d0947 --- /dev/null +++ b/services/rabbit/configs/erlang.cookie.secret.template @@ -0,0 +1 @@ +${RABBIT_ERLANG_COOKIE} diff --git a/services/rabbit/configs/haproxy.cfg.j2 b/services/rabbit/configs/haproxy.cfg.j2 new file mode 100644 index 00000000..1619e7ce --- /dev/null +++ b/services/rabbit/configs/haproxy.cfg.j2 @@ -0,0 +1,64 @@ +{% set NODE_IXS = range(1, (RABBIT_CLUSTER_NODE_COUNT | int) + 1) -%} + +global + log stdout format raw local0 + +# haproxy by default resolves server hostname only once +# this breaks if container restarts. By using resolvers +# we tell haproxy to re-resolve the hostname (so container +# restarts are handled properly) +resolvers dockerdns + nameserver dns1 127.0.0.11:53 + resolve_retries 3 + timeout resolve 1s + timeout retry 1s + hold other 10s + hold refused 10s + hold nx 10s + hold timeout 10s + hold valid 10s + hold obsolete 10s + +defaults + log global + mode tcp + option tcplog + + timeout connect 5s + timeout client 30s + timeout server 30s + +frontend rabbit + bind *:{{ RABBIT_PORT }} + default_backend rabbit_backends + +frontend rabbit_dashboard + bind *:{{ RABBIT_MANAGEMENT_PORT }} + default_backend rabbit_dashboard_backends + +frontend health + mode http + bind 127.0.0.1:32087 + http-request return status 200 if { src 127.0.0.0/8 } + +backend rabbit_backends + # side effect of roundrobin is connection should be evenly distributed + # thus rabbit queue leader replica shall be also evenly distributed + # (https://www.rabbitmq.com/docs/4.0/clustering#replica-placement) + # if algorithm below is changed, consider adjusting rabbit configuration + # as stated in documentation link above + balance roundrobin + + # init-addrs libc,none - start even if there aren’t any backend servers running +{% for ix in NODE_IXS %} + server rabbit0{{ ix }} rabbit-node0{{ ix }}_rabbit0{{ ix }}:{{ RABBIT_PORT }} check resolvers dockerdns init-addr libc,none inter 5s rise 2 fall 3 send-proxy +{%- endfor %} + +backend rabbit_dashboard_backends + mode http + balance roundrobin + +{% for ix in NODE_IXS %} + server rabbit0{{ ix }} rabbit-node0{{ ix }}_rabbit0{{ ix }}:{{ RABBIT_MANAGEMENT_PORT }} check resolvers dockerdns init-addr libc,none inter 5s rise 2 fall 3 +{%- endfor %} +# keep new line in the end to avoid "Missing LF on last line" error diff --git a/services/rabbit/configs/rabbitmq.conf.j2 b/services/rabbit/configs/rabbitmq.conf.j2 new file mode 100644 index 00000000..28be26d5 --- /dev/null +++ b/services/rabbit/configs/rabbitmq.conf.j2 @@ -0,0 +1,19 @@ +{% set NODE_IXS = range(1, (RABBIT_CLUSTER_NODE_COUNT | int) + 1) -%} + +# https://www.rabbitmq.com/docs/cluster-formation#peer-discovery-configuring-mechanism +cluster_formation.peer_discovery_backend = classic_config + +{% for ix in NODE_IXS %} +cluster_formation.classic_config.nodes.{{ ix }} = rabbit@rabbit-node0{{ ix }}_rabbit0{{ ix }} +{%- endfor %} + +## Sets the initial quorum queue replica count for newly declared quorum queues. +## This value can be overridden using the 'x-quorum-initial-group-size' queue argument +## at declaration time. +# https://www.rabbitmq.com/docs/quorum-queues#quorum-requirements +quorum_queue.initial_cluster_size = {{ RABBIT_QUORUM_QUEUE_DEFAULT_REPLICA_COUNT }} + +# Extract proper client ip when behind a proxy (e.g. haproxy) +# https://www.rabbitmq.com/docs/networking#proxy-protocol +# WARNING: this forces clients to use a proxy (direct access to nodes does not work) +proxy_protocol = true diff --git a/services/rabbit/docker-compose.loadbalancer.yml.j2 b/services/rabbit/docker-compose.loadbalancer.yml.j2 new file mode 100644 index 00000000..242a2585 --- /dev/null +++ b/services/rabbit/docker-compose.loadbalancer.yml.j2 @@ -0,0 +1,46 @@ +services: + loadbalancer: + image: haproxy:3.2 + deploy: + update_config: + order: start-first + parallelism: 1 + delay: 30s + failure_action: rollback + # https://discourse.haproxy.org/t/haproxy-high-availability-configuration/11983 + replicas: ${RABBIT_LB_REPLICAS} + # necessary to preserve client ip + # otherwise we see overlay rabbit network lb ip + # (rabbitmq management dashboard connection section) + endpoint_mode: dnsrr + resources: + limits: + # https://help.hcl-software.com/digital-experience/dx-95-doc-archive/CF203/platform/kubernetes/haproxy-migration/haproxy-configuration.html + cpus: "1" + memory: "2G" + # according to local observations and link below + # https://github.com/haproxytech/helm-charts/blob/haproxy-1.24.0/haproxy/values.yaml#L403 + reservations: + cpus: "0.1" + memory: "128M" + healthcheck: # https://stackoverflow.com/a/76513320/12124525 + test: bash -c 'echo "" > /dev/tcp/127.0.0.1/32087 || exit 1' + start_period: 5s + timeout: 2s + retries: 2 + interval: 10s + networks: + - rabbit + configs: + - source: haproxy.cfg + target: /usr/local/etc/haproxy/haproxy.cfg + +networks: + rabbit: + name: ${RABBIT_NETWORK} + external: true + +configs: + haproxy.cfg: + file: ./configs/haproxy.cfg + name: rabbit_haproxy_conf_{{ "./configs/haproxy.cfg" | sha256file | substring(0,10) }} diff --git a/services/rabbit/docker-compose.node0x.yml.j2 b/services/rabbit/docker-compose.node0x.yml.j2 new file mode 100644 index 00000000..83ac10f6 --- /dev/null +++ b/services/rabbit/docker-compose.node0x.yml.j2 @@ -0,0 +1,78 @@ +services: + rabbit0{{ NODE_INDEX }}: + image: itisfoundation/rabbitmq:4.1.2-management + init: true + # https://docs.docker.com/reference/cli/docker/service/create/#create-services-using-templates + hostname: {% raw %}"{{.Service.Name}}"{% endraw %} + deploy: + placement: + constraints: + - node.labels.rabbit0{{ NODE_INDEX }} == true + resources: + # https://www.rabbitmq.com/docs/production-checklist#minimum-hardware + limits: + cpus: "4.0" + memory: "4G" + reservations: + cpus: "1.0" + memory: "1G" + update_config: + order: "stop-first" + environment: + # https://www.rabbitmq.com/docs/configure#supported-environment-variables + RABBITMQ_DEFAULT_USER: ${RABBIT_USER} + RABBITMQ_DEFAULT_PASS: ${RABBIT_PASSWORD} + RABBITMQ_NODENAME: {% raw %}"rabbit@{{.Service.Name}}"{% endraw %} + RABBITMQ_NODE_PORT: ${RABBIT_PORT} + # https://docs.docker.com/reference/compose-file/services/#long-syntax-5 + # https://hub.docker.com/_/rabbitmq#erlang-cookie + secrets: + # https://github.com/docker-library/rabbitmq/issues/279 + - source: rabbit_erlang_cookie + target: /var/lib/rabbitmq/.erlang.cookie + mode: 0600 + # as long as "default" user is used (no user explicitly specified) + uid: "999" + gid: "999" + configs: + - source: rabbitmq.conf + target: /etc/rabbitmq/rabbitmq.conf + mode: 0600 + uid: "999" + gid: "999" + volumes: + - rabbit0{{ NODE_INDEX }}_data:/var/lib/rabbitmq + networks: + - rabbit + healthcheck: + # see https://hub.docker.com/_/rabbitmq#healthlivenessreadiness-checking + # https://www.rabbitmq.com/docs/clustering#restarting-readiness-probes + # we must have a healthcheck that does not require node to be fully booted (i.e. joined a cluster) + # because it creates a deadlock: docker swarm will not route to the node until it is healthy + # node is not healthy until it is part of a cluster (other nodes can talk to it) + test: rabbitmq-diagnostics ping + interval: 60s + timeout: 10s + retries: 2 + start_period: 30s + start_interval: 10s + +volumes: + rabbit0{{ NODE_INDEX }}_data: + name: rabbit0{{ NODE_INDEX }}_data + +networks: + rabbit: + name: ${RABBIT_NETWORK} + external: true + +configs: + rabbitmq.conf: + # no rolling update since it requires full cluster restart + file: ./configs/rabbitmq.conf + +secrets: + rabbit_erlang_cookie: + # no rolling update since it requires full cluster restart + # see https://github.com/rabbitmq/rabbitmq-server/discussions/14391 + file: ./configs/erlang.cookie.secret diff --git a/services/rabbit/template.env b/services/rabbit/template.env new file mode 100644 index 00000000..370ae73f --- /dev/null +++ b/services/rabbit/template.env @@ -0,0 +1,13 @@ +RABBIT_CLUSTER_NODE_COUNT=${RABBIT_CLUSTER_NODE_COUNT} +RABBIT_QUORUM_QUEUE_DEFAULT_REPLICA_COUNT=${RABBIT_QUORUM_QUEUE_DEFAULT_REPLICA_COUNT} + +RABBIT_USER=${RABBIT_USER} +RABBIT_PASSWORD=${RABBIT_PASSWORD} +RABBIT_PORT=${RABBIT_PORT} +RABBIT_MANAGEMENT_PORT=${RABBIT_MANAGEMENT_PORT} + +RABBIT_ERLANG_COOKIE=${RABBIT_ERLANG_COOKIE} +RABBIT_LB_REPLICAS=${RABBIT_LB_REPLICAS} +RABBIT_NETWORK=${RABBIT_NETWORK} + +PUBLIC_NETWORK=${PUBLIC_NETWORK}