diff --git a/README.md b/README.md index c22a1a2..a90fe50 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ get the config that `Cluster.Supervisor` expects. ```elixir config :libcluster, topologies: [ - example: [ + epmd_example: [ # The selected clustering strategy. Required. strategy: Cluster.Strategy.Epmd, # Configuration for the provided strategy. Optional. @@ -93,6 +93,10 @@ config :libcluster, # The function to use for listing nodes. # This function must return a list of node names. Optional list_nodes: {:erlang, :nodes, [:connected]}, + ], + # more topologies can be added ... + gossip_example: [ + # ... ] ] ``` diff --git a/lib/strategy/kubernetes.ex b/lib/strategy/kubernetes.ex index e70bdd7..27c45a5 100644 --- a/lib/strategy/kubernetes.ex +++ b/lib/strategy/kubernetes.ex @@ -1,51 +1,150 @@ defmodule Cluster.Strategy.Kubernetes do - @moduledoc ~S""" - This clustering strategy works by loading all endpoints in the current Kubernetes - namespace with the configured label. It will fetch the addresses of all endpoints with - that label and attempt to connect. It will continually monitor and update its - connections every 5s. Alternatively the IP can be looked up from the pods directly - by setting `kubernetes_ip_lookup_mode` to `:pods`. + @default_polling_interval 5_000 + @kubernetes_master "kubernetes.default.svc" + @service_account_path "/var/run/secrets/kubernetes.io/serviceaccount" + + @moduledoc """ + This clustering strategy works by fetching information of endpoints or pods, which are filtered by + given Kubernetes namespace and label. + + > This strategy requires a service account with the ability to list endpoints or pods. If you want + > to avoid that, you could use one of the DNS-based strategies instead. + > + > See `Cluster.Strategy.Kubernetes.DNS` and `Cluster.Strategy.Kubernetes.DNSSRV`. + + It assumes that all Erlang nodes are using longnames - `@`: + + + all nodes are using the same `` + + all nodes are using unique `` + + In `@`: + + + `` would be the value configured by `:kubernetes_node_basename` option. + + `` would be the value which is controlled by following options: + - `:kubernetes_namespace` + - `:kubernetes_selector` + - `:kubernetes_service_name` + - `:kubernetes_ip_lookup_mode` + - `:mode` + + ## Getting `` + + As said above, the basename is configured by `:kubernetes_node_basename` option. + + Just one thing to keep in mind - when building an OTP release, make sure that the name of the OTP + release matches the name configured by `:kubernetes_node_basename`. + + ## Getting `` + + ### `:kubernetes_namespace` and `:kubernetes_selector` option + + These two options configure how to filter required endpoints or pods. + + ### `:kubernetes_ip_lookup_mode` option + + These option configures where to lookup the required IP. + + Available values: + + + `:endpoints` (default) + + `:pods` + + #### :endpoints + + When setting this value, this strategy will lookup IP from endpoints. In order for your endpoints to be found they should be returned when you run: kubectl get endpoints -l app=myapp + Then, this strategy will fetch the addresses of all endpoints with that label and attempt to + connect. + + #### :pods + + When setting this value, this strategy will lookup IP from pods directly. + In order for your pods to be found they should be returned when you run: kubectl get pods -l app=myapp - It assumes that all nodes share a base name, are using longnames, and are unique - based on their FQDN, rather than the base hostname. In other words, in the following - longname, `@`, `basename` would be the value configured in - `kubernetes_node_basename`. + Then, this strategy will fetch the IP of all pods with that label and attempt to connect. + + + ### `:mode` option + + These option configures how to build the longname. + + Available values: + + + `:ip` (default) + + `:dns` + + `:hostname` + + #### :ip - `domain` would be the value configured in `mode` and can be either of type `:ip` - (the pod's ip, can be obtained by setting an env variable to status.podIP), `:hostname` - or `:dns`, which is the pod's internal A Record. This A Record has the format - `..pod.cluster.local`, e.g. - `1-2-3-4.default.pod.cluster.local`. + In this mode, the IP address is used directly. The longname will be something like: - Getting `:dns` to work requires setting the `POD_A_RECORD` environment variable before - the application starts. If you use Distillery you can set it in your `pre_configure` hook: + myapp@ + + Getting this mode to work requires: + + 1. exposing pod IP from Kubernetes to the Erlang node. + 2. setting the name of Erlang node according to the exposed information + + First, expose required information from Kubernetes as environment variables of Erlang node: # deployment.yaml - command: ["sh", "-c"] - args: ["POD_A_RECORD"] - args: ["export POD_A_RECORD=$(echo $POD_IP | sed 's/\./-/g') && /app/bin/app foreground"] + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP - # vm.args - -name app@<%= "${POD_A_RECORD}.${NAMESPACE}.pod.cluster.local" %> + Then, set the name of Erlang node by using the exposed environment variables. If you use mix releases, you + can configure the required options in `rel/env.sh.eex`: - If you use mix releases instead, you can configure the required options in `rel/env.sh.eex`. - Doing so will append a `-name` option to the `start` command directly and requires no further - changes to the `vm.args`: + # rel/env.sh.eex + export RELEASE_DISTRIBUTION=name + export RELEASE_NODE=<%= @release.name %>@${POD_IP} + + > `export RELEASE_DISTRIBUTION=name` will append a `-name` option to the `start` command directly + > and requires no further changes to the `vm.args`. + + #### :hostname + + In this mode, the hostname is used directly. The longname will be something like: + + myapp@...svc. + + Getting `:hostname` mode to work requires: + + 1. deploying pods as a StatefulSet (otherwise, hostname is not set for pods) + 2. setting `:kubernetes_service_name` to the name of the Kubernetes service that is being lookup + 3. setting the name of Erlang node according to hostname of pods + + Then, set the name of Erlang node by using the hostname of pod. If you use mix releases, you can + configure the required options in `rel/env.sh.eex`: # rel/env.sh.eex - export POD_A_RECORD=$(echo $POD_IP | sed 's/\./-/g') export RELEASE_DISTRIBUTION=name - export RELEASE_NODE=<%= @release.name %>@${POD_A_RECORD}.${NAMESPACE}.pod.cluster.local + export RELEASE_NODE=<%= @release.name %>@$(hostname -f) + + > `hostname -f` returns the whole FQDN, which is something like: + > `$(hostname).${SERVICE_NAME}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}"`. - To set the `NAMESPACE` and `POD_IP` environment variables you can configure your pod as follows: + #### :dns + + In this mode, an IP-based pod A record is used. The longname will be something like: + + myapp@..pod. + + Getting `:dns` mode to work requires: + + 1. exposing pod IP from Kubernetes to the Erlang node + 2. setting the name of Erlang node according to the exposed information + + First, expose required information from Kubernetes as environment variables of Erlang node: # deployment.yaml env: @@ -58,33 +157,74 @@ defmodule Cluster.Strategy.Kubernetes do fieldRef: fieldPath: status.podIP - The benefit of using `:dns` over `:ip` is that you can establish a remote shell (as well as - run observer) by using `kubectl port-forward` in combination with some entries in `/etc/hosts`. + Then, set the name of Erlang node by using the exposed environment variables. If you use mix + releases, you can configure the required options in `rel/env.sh.eex`: + + # rel/env.sh.eex + export POD_A_RECORD=$(echo $POD_IP | sed 's/\./-/g') + export CLUSTER_DOMAIN=cluster.local # modify this value according to your actual situation + export RELEASE_DISTRIBUTION=name + export RELEASE_NODE=<%= @release.name %>@${POD_A_RECORD}.${NAMESPACE}.pod.${CLUSTER_DOMAIN} + + ### Which mode is the best one? + + There is no best, only the best for you: + + + If you're not using a StatefulSet, use `:ip` or `:dns`. + + If you're using a StatefulSet, use `:hostname`. + + And, there is one thing that can be taken into consideration. When using `:ip` or `:dns`, you + can establish a remote shell (as well as run observer) by using `kubectl port-forward` in combination + with some entries in `/etc/hosts`. + + ## Polling Interval + + The default interval to sync topologies is `#{@default_polling_interval}` + (#{div(@default_polling_interval, 1000)} seconds). You can configure it with `:polling_interval` option. + + ## Getting cluster information + + > In general, you don't need to read this, the default values will work. + + This strategy fetchs information of endpoints or pods by accessing the REST API provided by + Kubernetes. + + The base URL of the REST API has two parts: - Using `:hostname` is useful when deploying your app to K8S as a stateful set. In this case you can - set your erlang name as the fully qualified domain name of the pod which would be something similar to - `my-app-0.my-service-name.my-namespace.svc.cluster.local`. - e.g. + . - # vm.args - -name app@<%=`(hostname -f)`%> + `` is configured by following options: - In this case you must also set `kubernetes_service_name` to the name of the K8S service that is being queried. + + `:kubernetes_master` - the default value is `#{@kubernetes_master}` - `mode` defaults to `:ip`. + `` is configured by following options and environment variables: - An example configuration is below: + + `:kubernetes_cluster_name` - the default value is `cluster`, and the final cluster domain will be `.local` + + `CLUSTER_DOMAIN` - when this environment variable is provided, `:kubernetes_cluster_name` will be ignored + + > `` and `` also affect each other, checkout the source code for more + > details. + + Besides the base URL of the REST API, a service account must be provided. The service account is + configured by following options: + + + `:kubernetes_service_account_path` - the default value is `#{@service_account_path}` + + ## An example configuration config :libcluster, topologies: [ - k8s_example: [ + erlang_nodes_in_k8s: [ strategy: #{__MODULE__}, config: [ mode: :ip, kubernetes_node_basename: "myapp", kubernetes_selector: "app=myapp", kubernetes_namespace: "my_namespace", - polling_interval: 10_000]]] + polling_interval: 10_000 + ] + ] + ] """ use GenServer @@ -93,10 +233,6 @@ defmodule Cluster.Strategy.Kubernetes do alias Cluster.Strategy.State - @default_polling_interval 5_000 - @kubernetes_master "kubernetes.default.svc" - @service_account_path "/var/run/secrets/kubernetes.io/serviceaccount" - def start_link(args), do: GenServer.start_link(__MODULE__, args) @impl true diff --git a/lib/strategy/kubernetes_dns.ex b/lib/strategy/kubernetes_dns.ex index c4cc51e..aa1e2dd 100644 --- a/lib/strategy/kubernetes_dns.ex +++ b/lib/strategy/kubernetes_dns.ex @@ -1,26 +1,91 @@ defmodule Cluster.Strategy.Kubernetes.DNS do + @default_polling_interval 5_000 + @moduledoc """ - This clustering strategy works by loading all your Erlang nodes (within Pods) in the current [Kubernetes - namespace](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/). - It will fetch the addresses of all pods under a shared headless service and attempt to connect. - It will continually monitor and update its connections every 5s. + This clustering strategy works by fetching IP addresses with the help of a headless service in + current Kubernetes namespace. + + > This strategy requires exposing pods by a headless service. + > If you want to avoid that, you could use `Cluster.Strategy.Kubernetes`. + + It assumes that all Erlang nodes are using longnames - `@`: + + + all nodes are using the same `` + + all nodes are using unique `` + + In `@`: + + + `` would be the value configured by `:application_name` option. + + `` would be the value which is controlled by following options: + - `:service` + - `:resolver` + + ## Getting `` + + As said above, the basename is configured by `:application_name` option. + + Just one thing to keep in mind - when building an OTP release, make sure that the name of the OTP + release matches the name configured by `:application_name`. + + ## Getting `` + + It will fetch IP addresses of all pods under a headless service and attempt to connect. - It assumes that all Erlang nodes were launched under a base name, are using longnames, and are unique - based on their FQDN, rather than the base hostname. In other words, in the following - longname, `@`, `basename` would be the value configured through - `application_name`. + ## Setup - An example configuration is below: + Getting this strategy to work requires: + 1. exposing pod IP from Kubernetes to the Erlang node. + 2. setting a headless service for the pods + 3. setting the name of Erlang node according to the exposed information + + First, expose required information from Kubernetes as environment variables of Erlang node: + + # deployment.yaml + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + + Second, set a headless service for the pods: + + # deployment.yaml + apiVersion: v1 + kind: Service + metadata: + name: myapp-headless + spec: + selector: + app: myapp + type: ClusterIP + clusterIP: None + + Then, set the name of Erlang node by using the exposed environment variables. If you use mix releases, you + can configure the required options in `rel/env.sh.eex`: + + # rel/env.sh.eex + export RELEASE_DISTRIBUTION=name + export RELEASE_NODE=<%= @release.name %>@${POD_IP} + + ## Polling Interval + + The default interval to sync topologies is `#{@default_polling_interval}` + (#{div(@default_polling_interval, 1000)} seconds). You can configure it with `:polling_interval` option. + + ## An example configuration config :libcluster, topologies: [ - k8s_example: [ + erlang_nodes_in_k8s: [ strategy: #{__MODULE__}, config: [ service: "myapp-headless", application_name: "myapp", - polling_interval: 10_000]]] + polling_interval: 10_000 + ] + ] + ] """ use GenServer @@ -29,8 +94,6 @@ defmodule Cluster.Strategy.Kubernetes.DNS do alias Cluster.Strategy.State - @default_polling_interval 5_000 - @impl true def start_link(args), do: GenServer.start_link(__MODULE__, args) diff --git a/lib/strategy/kubernetes_dns_srv.ex b/lib/strategy/kubernetes_dns_srv.ex index 9b780a5..7f49729 100644 --- a/lib/strategy/kubernetes_dns_srv.ex +++ b/lib/strategy/kubernetes_dns_srv.ex @@ -1,106 +1,149 @@ defmodule Cluster.Strategy.Kubernetes.DNSSRV do + @default_polling_interval 5_000 + @moduledoc """ - This clustering strategy works by issuing a SRV query for the kubernetes headless service - under which the stateful set containing your nodes is running. + This clustering strategy works by issuing a SRV query for the headless service where the StatefulSet + containing your nodes is running. - For more information, see the kubernetes stateful-application [documentation](https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#using-stable-network-identities) + > This strategy requires deploying pods as a StatefulSet which is exposed by a headless service. + > If you want to avoid that, you could use `Cluster.Strategy.Kubernetes.DNS`. - * It will fetch the FQDN of all pods under the headless service and attempt to connect. - * It will continually monitor and update its connections according to the polling_interval (default 5s) + It assumes that all Erlang nodes are using longnames - `@`: - The `application_name` is configurable (you may have launched erlang with a different configured name), - but will in most cases be the name of your application + + all nodes are using the same `` + + all nodes are using unique `` - An example configuration is below: + In `@`: - config :libcluster, - topologies: [ - k8s_example: [ - strategy: #{__MODULE__}, - config: [ - service: "elixir-plug-poc", - application_name: "elixir_plug_poc", - namespace: "default", - polling_interval: 10_000]]] + + `` would be the value configured by `:application_name` option. + + `` would be the value which is controlled by following options: + - `:service` + - `:namespace` + - `:resolver` - An example of how this strategy extracts topology information from DNS follows: + ## Getting `` - ``` - bash-5.0# hostname -f - elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local - bash-5.0# dig SRV elixir-plug-poc.default.svc.cluster.local + As said above, the basename is configured by `:application_name` option. - ; <<>> DiG 9.14.3 <<>> SRV elixir-plug-poc.default.svc.cluster.local - ;; global options: +cmd - ;; Got answer: - ;; WARNING: .local is reserved for Multicast DNS - ;; You are currently testing what happens when an mDNS query is leaked to DNS - ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 7169 - ;; flags: qr aa rd ra; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 2 + Just one thing to keep in mind - when building an OTP release, make sure that the name of the OTP + release matches the name configured by `:application_name`. - ;; QUESTION SECTION: - ;elixir-plug-poc.default.svc.cluster.local. IN SRV + ## Getting `` - ;; ANSWER SECTION: - elixir-plug-poc.default.svc.cluster.local. 30 IN SRV 10 50 0 elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local. - elixir-plug-poc.default.svc.cluster.local. 30 IN SRV 10 50 0 elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local. + > For more information, see the kubernetes stateful-application [documentation](https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#using-stable-network-identities) - ;; ADDITIONAL SECTION: - elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local. 30 IN A 10.1.0.95 - elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local. 30 IN A 10.1.0.96 + ## Setup - ;; Query time: 0 msec - ;; SERVER: 10.96.0.10#53(10.96.0.10) - ;; WHEN: Wed Jul 03 11:55:27 UTC 2019 - ;; MSG SIZE rcvd: 167 - ``` + Getting this strategy to work requires: + + 1. deploying pods as a StatefulSet (otherwise, hostname won't set for pods) + 2. exposing above StatefulSet by a headless service (otherwise, the SRV query won't work as expected) + 3. setting the name of Erlang node according to hostname of pods - And here is an example of a corresponding kubernetes statefulset/service definition: + First, deploying pods as a StatefulSet which is exposed by a headless service. And here is an + example of a corresponding Kubernetes definition: ```yaml apiVersion: v1 kind: Service metadata: - name: elixir-plug-poc + name: "myapp-headless" labels: - app: elixir-plug-poc + app: myapp spec: ports: - port: 4000 name: web clusterIP: None selector: - app: elixir-plug-poc + app: myapp --- apiVersion: apps/v1 kind: StatefulSet metadata: - name: elixir-plug-poc + name: myapp spec: - serviceName: "elixir-plug-poc" + serviceName: "myapp-headless" replicas: 2 selector: matchLabels: - app: elixir-plug-poc + app: myapp template: metadata: labels: - app: elixir-plug-poc + app: myapp spec: containers: - - name: elixir-plug-poc - image: binarytemple/elixir_plug_poc - args: - - foreground - env: - - name: ERLANG_COOKIE - value: "cookie" + - name: myapp + image: myapp:v1.0.0 imagePullPolicy: Always ports: - containerPort: 4000 name: http protocol: TCP ``` + + Then, set the name of Erlang node by using the hostname of pod. If you use mix releases, you + can configure the required options in `rel/env.sh.eex`: + + # rel/env.sh.eex + export RELEASE_DISTRIBUTION=name + export RELEASE_NODE=<%= @release.name %>@$(hostname -f) + + ## Polling Interval + + The default interval to sync topologies is `#{@default_polling_interval}` + (#{div(@default_polling_interval, 1000)} seconds). You can configure it with `:polling_interval` option. + + ## An example configuration + + config :libcluster, + topologies: [ + erlang_nodes_in_k8s: [ + strategy: #{__MODULE__}, + config: [ + service: "myapp-headless", + application_name: "myapp", + namespace: "default", + polling_interval: 10_000 + ] + ] + ] + + ## An example of how this strategy extracts topology information from DNS + + ```sh + $ hostname -f + myapp-1.myapp-headless.default.svc.cluster.local + + # An SRV query for a headless service returns multiple entries + $ dig SRV myapp-headless.default.svc.cluster.local + + ; <<>> DiG 9.14.3 <<>> SRV myapp-headless.default.svc.cluster.local + ;; global options: +cmd + ;; Got answer: + ;; WARNING: .local is reserved for Multicast DNS + ;; You are currently testing what happens when an mDNS query is leaked to DNS + ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 7169 + ;; flags: qr aa rd ra; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 2 + + ;; QUESTION SECTION: + ;myapp-headless.default.svc.cluster.local. IN SRV + + ;; ANSWER SECTION: + myapp-headless.default.svc.cluster.local. 30 IN SRV 10 50 0 myapp-0.myapp-headless.default.svc.cluster.local. + myapp-headless.default.svc.cluster.local. 30 IN SRV 10 50 0 myapp-1.myapp-headless.default.svc.cluster.local. + + ;; ADDITIONAL SECTION: + myapp-0.myapp-headless.default.svc.cluster.local. 30 IN A 10.1.0.95 + myapp--1.myapp-headless.default.svc.cluster.local. 30 IN A 10.1.0.96 + + ;; Query time: 0 msec + ;; SERVER: 10.96.0.10#53(10.96.0.10) + ;; WHEN: Wed Jul 03 11:55:27 UTC 2019 + ;; MSG SIZE rcvd: 167 + ``` + """ use GenServer use Cluster.Strategy @@ -108,8 +151,6 @@ defmodule Cluster.Strategy.Kubernetes.DNSSRV do alias Cluster.Strategy.State - @default_polling_interval 5_000 - @impl true def start_link(args), do: GenServer.start_link(__MODULE__, args) diff --git a/lib/strategy/state.ex b/lib/strategy/state.ex index a72e038..4173160 100644 --- a/lib/strategy/state.ex +++ b/lib/strategy/state.ex @@ -1,5 +1,7 @@ defmodule Cluster.Strategy.State do - @moduledoc false + @moduledoc """ + The state of one strategy. + """ @type t :: %__MODULE__{ topology: atom,