From 2798c4bb3f9962d186a4b66ccbe7656a1cd6aab9 Mon Sep 17 00:00:00 2001 From: marcel-dempers Date: Tue, 25 Aug 2020 21:41:56 +1000 Subject: [PATCH] hpa + ca into one guide --- .../traffic-generator.yaml | 0 .../components/autoscaler-cluster/readme.md | 136 ------------------ .../autoscaler-cluster/traffic-generator.yaml | 18 --- .../components/autoscaler-hpa/readme.md | 32 ----- .../components/autoscaler-vpa/readme.md | 105 -------------- .../autoscaler-vpa/traffic-generator.yaml | 11 -- .../metric-server/metricserver-0.3.7.yaml | 2 + kubernetes/autoscaling/readme.md | 44 +++++- 8 files changed, 40 insertions(+), 308 deletions(-) rename kubernetes/autoscaling/components/{autoscaler-hpa => application}/traffic-generator.yaml (100%) delete mode 100644 kubernetes/autoscaling/components/autoscaler-cluster/readme.md delete mode 100644 kubernetes/autoscaling/components/autoscaler-cluster/traffic-generator.yaml delete mode 100644 kubernetes/autoscaling/components/autoscaler-hpa/readme.md delete mode 100644 kubernetes/autoscaling/components/autoscaler-vpa/readme.md delete mode 100644 kubernetes/autoscaling/components/autoscaler-vpa/traffic-generator.yaml diff --git a/kubernetes/autoscaling/components/autoscaler-hpa/traffic-generator.yaml b/kubernetes/autoscaling/components/application/traffic-generator.yaml similarity index 100% rename from kubernetes/autoscaling/components/autoscaler-hpa/traffic-generator.yaml rename to kubernetes/autoscaling/components/application/traffic-generator.yaml diff --git a/kubernetes/autoscaling/components/autoscaler-cluster/readme.md b/kubernetes/autoscaling/components/autoscaler-cluster/readme.md deleted file mode 100644 index 746e54e..0000000 --- a/kubernetes/autoscaling/components/autoscaler-cluster/readme.md +++ /dev/null @@ -1,136 +0,0 @@ -# Cluster Autoscaling - -Scales the number of nodes in our cluster based off usage metrics -[Documentation](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) - -## Understanding Resources - -In this example, I'll be focusing on CPU for scaling.
-We need to ensure we have an understanding of the compute resources we have.
-1) How many cores do we have
-2) How many cores do our application use
- -I go into more details about pod resource utilisation in the Horizontal Pod Autoscaler guide. - -# We need a Kubernetes cluster with Cluster Autoscaler - -``` -# azure example - -NAME=aks-getting-started -RESOURCEGROUP=aks-getting-started -SERVICE_PRINCIPAL= -SERVICE_PRINCIPAL_SECRET= - -az aks create -n $NAME \ ---resource-group $RESOURCEGROUP \ ---location australiaeast \ ---kubernetes-version 1.16.10 \ ---nodepool-name default \ ---node-count 1 \ ---node-vm-size Standard_F4s_v2 \ ---node-osdisk-size 250 \ ---service-principal $SERVICE_PRINCIPAL \ ---client-secret $SERVICE_PRINCIPAL_SECRET \ ---output none \ ---enable-cluster-autoscaler \ ---min-count 1 \ ---max-count 5 -``` - -# Deploy Metric Server - -[Metric Server](https://github.com/kubernetes-sigs/metrics-server) provides container resource metrics for use in autoscaling pipelines - -We will need to deploy Metric Server [0.3.7](https://github.com/kubernetes-sigs/metrics-server/releases/tag/v0.3.7)
-I used `components.yaml`from the release page link above.
- -Note: For Demo clusters (like `kind`), you will need to disable TLS
-You can disable TLS by adding the following to the metrics-server container args - -``` -- --kubelet-insecure-tls -- --kubelet-preferred-address-types="InternalIP" - -``` - -Deploy it: - -``` -cd kubernetes\autoscaling -kubectl -n kube-system apply -f .\metric-server\metricserver-0.3.7.yaml - -#test -kubectl -n kube-system get pods - -#wait for metrics to populate -kubectl top nodes - -``` - -## Example App - -We have an app that simulates CPU usage - -``` -# build - -cd kubernetes\autoscaling\application-cpu -docker build . -t aimvector/application-cpu:v1.0.0 - -# push -docker push aimvector/application-cpu:v1.0.0 - -# resource requirements -resources: - requests: - memory: "50Mi" - cpu: "500m" - limits: - memory: "500Mi" - cpu: "2000m" - -# deploy -kubectl apply -f deployment.yaml - -# metrics -kubectl top pods -``` - -## Generate some CPU load - -``` -# Deploy a tester to run traffic from - -cd kubernetes/autoscaling -kubectl apply -f ./autoscaler-cluster/tester.yaml - -# get a terminal -kubectl exec -it tester sh -# install wrk -apk add --no-cache wrk curl - -# simulate some load -wrk -c 5 -t 5 -d 99999 -H "Connection: Close" http://application-cpu - -# scale and keep checking `kubectl top` -# every time we add a pod, CPU load per pod should drop dramatically. -# roughly 8 pods will have each pod use +- 400m - -kubectl scale deploy/application-cpu --replicas 2 -``` - -## Deploy an autoscaler - -``` -# scale the deployment back down to 2 -kubectl scale deploy/application-cpu --replicas 2 - -# deploy the autoscaler -kubectl autoscale deploy/application-cpu --cpu-percent=95 --min=1 --max=10 - -# pods should scale to roughly 7-8 to match criteria - -kubectl describe hpa/application-cpu -kubectl get hpa/application-cpu -owide -``` diff --git a/kubernetes/autoscaling/components/autoscaler-cluster/traffic-generator.yaml b/kubernetes/autoscaling/components/autoscaler-cluster/traffic-generator.yaml deleted file mode 100644 index cc0d3c0..0000000 --- a/kubernetes/autoscaling/components/autoscaler-cluster/traffic-generator.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: traffic-generator -spec: - containers: - - name: alpine - resources: - requests: - memory: "50Mi" - cpu: "500m" - limits: - memory: "500Mi" - cpu: "2000m" - image: alpine - args: - - sleep - - "100000000" \ No newline at end of file diff --git a/kubernetes/autoscaling/components/autoscaler-hpa/readme.md b/kubernetes/autoscaling/components/autoscaler-hpa/readme.md deleted file mode 100644 index 5b72a63..0000000 --- a/kubernetes/autoscaling/components/autoscaler-hpa/readme.md +++ /dev/null @@ -1,32 +0,0 @@ -# Horizontal Pod Autoscaling - -Scales the number of pods in a deployment based off metrics. -Kubernetes [documentation](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) - -## Understanding Resources - -In this example, I'll be focusing on CPU for scaling.
-We need to ensure we have an understanding of the compute resources we have.
-1) How many cores do we have
-2) How many cores do our application use
-We need to ensure we have an understanding of the compute resources we have.
-1) How many cores do we have
-2) How many cores do our application use
-3) Observe our applications usage -4) Use the VPA to recommend resource request values for our application - -## Create a cluster - -My Node has 6 CPU cores for this demo
- -``` -kind create cluster --name vpa --image kindest/node:v1.18.4 -``` - - -# Deploy Metric Server - -[Metric Server](https://github.com/kubernetes-sigs/metrics-server) provides container resource metrics for use in autoscaling pipelines - -We will need to deploy Metric Server [0.3.7](https://github.com/kubernetes-sigs/metrics-server/releases/tag/v0.3.7)
-I used `components.yaml`from the release page link above.
- -Note: For Demo clusters (like `kind`), you will need to disable TLS
-You can disable TLS by adding the following to the metrics-server container args
- -For production, make sure you remove the following :
- -``` -- --kubelet-insecure-tls -- --kubelet-preferred-address-types="InternalIP" - -``` - -Deploy it: - -``` -cd kubernetes\autoscaling -kubectl -n kube-system apply -f .\metric-server\metricserver-0.3.7.yaml - -#test -kubectl -n kube-system get pods - -#wait for metrics to populate -kubectl top nodes - -``` - -## Example App - -We have an app that simulates CPU usage - -``` -# build - -cd kubernetes\autoscaling\application-cpu -docker build . -t aimvector/application-cpu:v1.0.0 - -# push -docker push aimvector/application-cpu:v1.0.0 - -# resource requirements -resources: - requests: - memory: "50Mi" - cpu: "500m" - limits: - memory: "500Mi" - cpu: "2000m" - -# deploy -kubectl apply -f deployment.yaml - -# metrics -kubectl top pods -``` - -## Generate some CPU load - -``` -# Deploy a tester to run traffic from - -cd kubernetes\autoscaling -kubectl apply -f .\autoscaler-vpa\tester.yaml - -# get a terminal -kubectl exec -it tester sh -# install wrk -apk add --no-cache wrk curl - -# simulate some load -wrk -c 5 -t 5 -d 99999 -H "Connection: Close" http://application-cpu - -# scale and keep checking `kubectl top` -# every time we add a pod, CPU load per pod should drop dramatically. -# roughly 8 pods will have each pod use +- 400m - -kubectl scale deploy/application-cpu --replicas 2 -``` \ No newline at end of file diff --git a/kubernetes/autoscaling/components/autoscaler-vpa/traffic-generator.yaml b/kubernetes/autoscaling/components/autoscaler-vpa/traffic-generator.yaml deleted file mode 100644 index 50bffa5..0000000 --- a/kubernetes/autoscaling/components/autoscaler-vpa/traffic-generator.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: traffic-generator -spec: - containers: - - name: alpine - image: alpine - args: - - sleep - - "100000000" \ No newline at end of file diff --git a/kubernetes/autoscaling/components/metric-server/metricserver-0.3.7.yaml b/kubernetes/autoscaling/components/metric-server/metricserver-0.3.7.yaml index 972364f..52c12fa 100644 --- a/kubernetes/autoscaling/components/metric-server/metricserver-0.3.7.yaml +++ b/kubernetes/autoscaling/components/metric-server/metricserver-0.3.7.yaml @@ -88,6 +88,8 @@ spec: args: - --cert-dir=/tmp - --secure-port=4443 + - --kubelet-insecure-tls + - --kubelet-preferred-address-types="InternalIP" ports: - name: main-port containerPort: 4443 diff --git a/kubernetes/autoscaling/readme.md b/kubernetes/autoscaling/readme.md index 020f7b7..0d840b3 100644 --- a/kubernetes/autoscaling/readme.md +++ b/kubernetes/autoscaling/readme.md @@ -3,6 +3,8 @@ ## Cluster Autoscaling Cluster autoscaler allows us to scale cluster nodes when they become full
+I would recommend to learn about scaling your cluster nodes before scaling pods.
+Video [here](https://youtu.be/jM36M39MA3I) ## Horizontal Pod Autoscaling @@ -50,7 +52,6 @@ My Node has 6 CPU cores for this demo
kind create cluster --name hpa --image kindest/node:v1.18.4 ``` - ### Metric Server * For `Cluster Autoscaler` - On cloud-based clusters, Metric server may already be installed.
@@ -78,12 +79,12 @@ Deployment:
``` cd kubernetes\autoscaling -kubectl -n kube-system apply -f .\metric-server\metricserver-0.3.7.yaml +kubectl -n kube-system apply -f .\components\metric-server\metricserver-0.3.7.yaml #test kubectl -n kube-system get pods -#wait for metrics to populate +#note: wait for metrics to populate! kubectl top nodes ``` @@ -101,7 +102,7 @@ For all autoscaling guides, we'll need a simple app, that generates some CPU loa ``` # build -cd kubernetes\autoscaling\application-cpu +cd kubernetes\autoscaling\components\application docker build . -t aimvector/application-cpu:v1.0.0 # push @@ -124,17 +125,48 @@ kubectl top pods ``` +## Cluster Autoscaler + +For cluster autoscaling, you should be able to scale the pods manually and watch the cluster scale.
+Cluster autoscaling stops here.
+For Pod Autoscaling (HPA), continue
+ ## Generate some traffic +Let's deploy a simple traffic generator pod ``` +cd kubernetes\autoscaling\components\application +kubectl apply -f .\traffic-generator.yaml + # get a terminal to the traffic-generator kubectl exec -it traffic-generator sh + # install wrk -apk add --no-cache wrk curl +apk add --no-cache wrk # simulate some load wrk -c 5 -t 5 -d 99999 -H "Connection: Close" http://application-cpu +#you can scale to pods manually and see roughly 6-7 pods will satisfy resource requests. kubectl scale deploy/application-cpu --replicas 2 -``` \ No newline at end of file +``` + +## Deploy an autoscaler + +``` +# scale the deployment back down to 2 +kubectl scale deploy/application-cpu --replicas 2 + +# deploy the autoscaler +kubectl autoscale deploy/application-cpu --cpu-percent=95 --min=1 --max=10 + +# pods should scale to roughly 6-7 to match criteria of 95% of resource requests + +kubectl get pods +kubectl top pods +kubectl get hpa/application-cpu -owide + +kubectl describe hpa/application-cpu + +```