Merge pull request #33 from marcel-dempers/k8s-autoscaling

autoscaling
2025-06-06 17:01:30 +00:00 · 2020-08-24 20:40:16 +00:00 · 2020-08-24 20:40:16 +00:00 · 641b16ab12
commit 641b16ab12
parent ffcfb28c4a 74ea9524dd
11 changed files with 691 additions and 0 deletions
--- a/kubernetes/autoscaling/components/application/app.go
+++ b/kubernetes/autoscaling/components/application/app.go
@ -0,0 +1,22 @@
 package main
 import (
 	"fmt"
 	"net/http"
 )
 func main(){	
 	http.HandleFunc("/", useCPU)
 	http.ListenAndServe(":80", nil)
 }
 func useCPU(w http.ResponseWriter, r *http.Request) {
 	count := 1
 	for i := 1; i <= 1000000; i++ {
 		count = i
 	}
 	fmt.Printf("count: %d", count)
 	w.Write([]byte(string(count)))
 }
--- a/kubernetes/autoscaling/components/application/deployment.yaml
+++ b/kubernetes/autoscaling/components/application/deployment.yaml
@ -0,0 +1,50 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: application-cpu
  labels:
    app: application-cpu
 spec:
  type: ClusterIP
  selector:
    app: application-cpu
  ports:
    - protocol: TCP
      name: http
      port: 80
      targetPort: 80
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: application-cpu
  labels:
    app: application-cpu
 spec:
  selector:
    matchLabels:
      app: application-cpu
  replicas: 1
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  template:
    metadata:
      labels:
        app: application-cpu
    spec:
      containers:
      - name: application-cpu
        image: aimvector/application-cpu:v1.0.2
        imagePullPolicy: Always
        ports:
        - containerPort: 80
        resources:
          requests:
            memory: "50Mi"
            cpu: "500m"
          limits:
            memory: "500Mi"
            cpu: "2000m"
--- a/kubernetes/autoscaling/components/application/dockerfile
+++ b/kubernetes/autoscaling/components/application/dockerfile
@ -0,0 +1,15 @@
 FROM golang:1.14-alpine as build
 RUN apk add --no-cache git curl
 WORKDIR /src 
 COPY app.go /src 
 RUN go build app.go
 FROM alpine as runtime
 COPY --from=build /src/app /app/app
 CMD [ "/app/app" ]
--- a/kubernetes/autoscaling/components/autoscaler-cluster/readme.md
+++ b/kubernetes/autoscaling/components/autoscaler-cluster/readme.md
@ -0,0 +1,136 @@
 # Cluster Autoscaling
 Scales the number of nodes in our cluster based off usage metrics
 [Documentation](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler)
 ## Understanding Resources
 In this example, I'll be focusing on CPU for scaling. <br/>
 We need to ensure we have an understanding of the compute resources we have. <br/>
 1) How many cores do we have <br/>
 2) How many cores do our application use <br/>
 I go into more details about pod resource utilisation in the Horizontal Pod Autoscaler guide.
 # We need a Kubernetes cluster with Cluster Autoscaler
 ```
 # azure example
 NAME=aks-getting-started
 RESOURCEGROUP=aks-getting-started
 SERVICE_PRINCIPAL=
 SERVICE_PRINCIPAL_SECRET=
 az aks create -n $NAME \
 --resource-group $RESOURCEGROUP \
 --location australiaeast \
 --kubernetes-version 1.16.10 \
 --nodepool-name default \
 --node-count 1 \
 --node-vm-size Standard_F4s_v2  \
 --node-osdisk-size 250 \
 --service-principal $SERVICE_PRINCIPAL \
 --client-secret $SERVICE_PRINCIPAL_SECRET \
 --output none \
 --enable-cluster-autoscaler \
 --min-count 1 \
 --max-count 5
 ```
 # Deploy Metric Server
 [Metric Server](https://github.com/kubernetes-sigs/metrics-server) provides container resource metrics for use in autoscaling pipelines
 We will need to deploy Metric Server [0.3.7](https://github.com/kubernetes-sigs/metrics-server/releases/tag/v0.3.7) <br/>
 I used `components.yaml`from the release page link above. <br/>
 Note: For Demo clusters (like `kind`), you will need to disable TLS <br/>
 You can disable TLS by adding the following to the metrics-server container args
 ```
 - --kubelet-insecure-tls
 - --kubelet-preferred-address-types="InternalIP"
 ```
 Deploy it:
 ```
 cd kubernetes\autoscaling
 kubectl -n kube-system apply -f .\metric-server\metricserver-0.3.7.yaml
 #test 
 kubectl -n kube-system get pods
 #wait for metrics to populate
 kubectl top nodes
 ```
 ## Example App
 We have an app that simulates CPU usage
 ```
 # build
 cd kubernetes\autoscaling\application-cpu
 docker build . -t aimvector/application-cpu:v1.0.0
 # push
 docker push aimvector/application-cpu:v1.0.0
 # resource requirements
 resources:
  requests:
    memory: "50Mi"
    cpu: "500m"
  limits:
    memory: "500Mi"
    cpu: "2000m"
 # deploy 
 kubectl apply -f deployment.yaml
 # metrics
 kubectl top pods
 ```
 ## Generate some CPU load
 ```
 # Deploy a tester to run traffic from
 cd kubernetes/autoscaling
 kubectl apply -f ./autoscaler-cluster/tester.yaml
 # get a terminal
 kubectl exec -it tester sh
 # install wrk
 apk add --no-cache wrk curl
 # simulate some load
 wrk -c 5 -t 5 -d 99999 -H "Connection: Close" http://application-cpu
 # scale and keep checking `kubectl top`
 # every time we add a pod, CPU load per pod should drop dramatically.
 # roughly 8 pods will have each pod use +- 400m
 kubectl scale deploy/application-cpu --replicas 2
 ```
 ## Deploy an autoscaler
 ```
 # scale the deployment back down to 2
 kubectl scale deploy/application-cpu --replicas 2
 # deploy the autoscaler
 kubectl autoscale deploy/application-cpu --cpu-percent=95 --min=1 --max=10
 # pods should scale to roughly 7-8 to match criteria
 kubectl describe hpa/application-cpu 
 kubectl get hpa/application-cpu  -owide
 ```
--- a/kubernetes/autoscaling/components/autoscaler-cluster/traffic-generator.yaml
+++ b/kubernetes/autoscaling/components/autoscaler-cluster/traffic-generator.yaml
@ -0,0 +1,18 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: traffic-generator
 spec:
  containers:
  - name: alpine
    resources:
      requests:
        memory: "50Mi"
        cpu: "500m"
      limits:
        memory: "500Mi"
        cpu: "2000m"
    image: alpine
    args:
    - sleep
    - "100000000"
--- a/kubernetes/autoscaling/components/autoscaler-hpa/readme.md
+++ b/kubernetes/autoscaling/components/autoscaler-hpa/readme.md
@ -0,0 +1,32 @@
 # Horizontal Pod Autoscaling
 Scales the number of pods in a deployment based off  metrics.
 Kubernetes [documentation](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/)
 ## Understanding Resources
 In this example, I'll be focusing on CPU for scaling. <br/>
 We need to ensure we have an understanding of the compute resources we have. <br/>
 1) How many cores do we have <br/>
 2) How many cores do our application use <br
 # scale and keep checking `kubectl top`
 # every time we add a pod, CPU load per pod should drop dramatically.
 # roughly 8 pods will have each pod use +- 400m
 ## Deploy an autoscaler
 ```
 # scale the deployment back down to 2
 kubectl scale deploy/application-cpu --replicas 2
 # deploy the autoscaler
 kubectl autoscale deploy/application-cpu --cpu-percent=95 --min=1 --max=10
 # pods should scale to roughly 7-8 to match criteria
 kubectl describe hpa/application-cpu 
 kubectl get hpa/application-cpu  -owide
 ```
--- a/kubernetes/autoscaling/components/autoscaler-hpa/traffic-generator.yaml
+++ b/kubernetes/autoscaling/components/autoscaler-hpa/traffic-generator.yaml
@ -0,0 +1,11 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: traffic-generator
 spec:
  containers:
  - name: alpine
    image: alpine
    args:
    - sleep
    - "100000000"
--- a/kubernetes/autoscaling/components/autoscaler-vpa/readme.md
+++ b/kubernetes/autoscaling/components/autoscaler-vpa/readme.md
@ -0,0 +1,105 @@
 # Vertical Pod Autoscaling
 Provides recommendations for CPU and Memory request values.
 ## Understanding Resources
 In this example, I'll be focusing on CPU for scaling. <br/>
 We need to ensure we have an understanding of the compute resources we have. <br/>
 1) How many cores do we have <br/>
 2) How many cores do our application use <br/>
 3) Observe our applications usage
 4) Use the VPA to recommend resource request values for our application
 ## Create a cluster
 My Node has 6 CPU cores for this demo <br/>
 ```
 kind create cluster --name vpa --image kindest/node:v1.18.4
 ```
 # Deploy Metric Server
 [Metric Server](https://github.com/kubernetes-sigs/metrics-server) provides container resource metrics for use in autoscaling pipelines
 We will need to deploy Metric Server [0.3.7](https://github.com/kubernetes-sigs/metrics-server/releases/tag/v0.3.7) <br/>
 I used `components.yaml`from the release page link above. <br/>
 Note: For Demo clusters (like `kind`), you will need to disable TLS <br/>
 You can disable TLS by adding the following to the metrics-server container args <br/>
 For production, make sure you remove the following : <br/>
 ```
 - --kubelet-insecure-tls
 - --kubelet-preferred-address-types="InternalIP"
 ```
 Deploy it:
 ```
 cd kubernetes\autoscaling
 kubectl -n kube-system apply -f .\metric-server\metricserver-0.3.7.yaml
 #test 
 kubectl -n kube-system get pods
 #wait for metrics to populate
 kubectl top nodes
 ```
 ## Example App
 We have an app that simulates CPU usage
 ```
 # build
 cd kubernetes\autoscaling\application-cpu
 docker build . -t aimvector/application-cpu:v1.0.0
 # push
 docker push aimvector/application-cpu:v1.0.0
 # resource requirements
 resources:
  requests:
    memory: "50Mi"
    cpu: "500m"
  limits:
    memory: "500Mi"
    cpu: "2000m"
 # deploy 
 kubectl apply -f deployment.yaml
 # metrics
 kubectl top pods
 ```
 ## Generate some CPU load
 ```
 # Deploy a tester to run traffic from
 cd kubernetes\autoscaling
 kubectl apply -f .\autoscaler-vpa\tester.yaml
 # get a terminal
 kubectl exec -it tester sh
 # install wrk
 apk add --no-cache wrk curl
 # simulate some load
 wrk -c 5 -t 5 -d 99999 -H "Connection: Close" http://application-cpu
 # scale and keep checking `kubectl top`
 # every time we add a pod, CPU load per pod should drop dramatically.
 # roughly 8 pods will have each pod use +- 400m
 kubectl scale deploy/application-cpu --replicas 2
 ```
--- a/kubernetes/autoscaling/components/autoscaler-vpa/traffic-generator.yaml
+++ b/kubernetes/autoscaling/components/autoscaler-vpa/traffic-generator.yaml
@ -0,0 +1,11 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: traffic-generator
 spec:
  containers:
  - name: alpine
    image: alpine
    args:
    - sleep
    - "100000000"
--- a/kubernetes/autoscaling/components/metric-server/metricserver-0.3.7.yaml
+++ b/kubernetes/autoscaling/components/metric-server/metricserver-0.3.7.yaml
@ -0,0 +1,151 @@
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
  name: system:aggregated-metrics-reader
  labels:
    rbac.authorization.k8s.io/aggregate-to-view: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
 rules:
 - apiGroups: ["metrics.k8s.io"]
  resources: ["pods", "nodes"]
  verbs: ["get", "list", "watch"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
  name: metrics-server:system:auth-delegator
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
 subjects:
 - kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
  name: metrics-server-auth-reader
  namespace: kube-system
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
 subjects:
 - kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
 ---
 apiVersion: apiregistration.k8s.io/v1beta1
 kind: APIService
 metadata:
  name: v1beta1.metrics.k8s.io
 spec:
  service:
    name: metrics-server
    namespace: kube-system
  group: metrics.k8s.io
  version: v1beta1
  insecureSkipTLSVerify: true
  groupPriorityMinimum: 100
  versionPriority: 100
 ---
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: metrics-server
  namespace: kube-system
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: metrics-server
  namespace: kube-system
  labels:
    k8s-app: metrics-server
 spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  template:
    metadata:
      name: metrics-server
      labels:
        k8s-app: metrics-server
    spec:
      serviceAccountName: metrics-server
      volumes:
      # mount in tmp so we can safely use from-scratch images and/or read-only containers
      - name: tmp-dir
        emptyDir: {}
      containers:
      - name: metrics-server
        image: k8s.gcr.io/metrics-server/metrics-server:v0.3.7
        imagePullPolicy: IfNotPresent
        args:
          - --cert-dir=/tmp
          - --secure-port=4443
        ports:
        - name: main-port
          containerPort: 4443
          protocol: TCP
        securityContext:
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
        volumeMounts:
        - name: tmp-dir
          mountPath: /tmp
      nodeSelector:
        kubernetes.io/os: linux
        kubernetes.io/arch: "amd64"
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: metrics-server
  namespace: kube-system
  labels:
    kubernetes.io/name: "Metrics-server"
    kubernetes.io/cluster-service: "true"
 spec:
  selector:
    k8s-app: metrics-server
  ports:
  - port: 443
    protocol: TCP
    targetPort: main-port
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
  name: system:metrics-server
 rules:
 - apiGroups:
  - ""
  resources:
  - pods
  - nodes
  - nodes/stats
  - namespaces
  - configmaps
  verbs:
  - get
  - list
  - watch
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
  name: system:metrics-server
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
 subjects:
 - kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
--- a/kubernetes/autoscaling/readme.md
+++ b/kubernetes/autoscaling/readme.md
@ -0,0 +1,140 @@
 # Kubernetes Autoscaling Guide
 ## Cluster Autoscaling
 Cluster autoscaler allows us to scale cluster nodes when they become full <br/>
 ## Horizontal Pod Autoscaling
 HPA allows us to scale pods when their resource utilisation goes over a threshold <br/>
 ## Requirements
 ### A Cluster 
 * For both autoscaling guides, we'll need a cluster. <br/>
 * For `Cluster Autoscaler` You need a cloud based cluster that supports the cluster autoscaler <br/>
 * For `HPA` We'll use [kind](http://kind.sigs.k8s.io/)
 ### Cluster Autoscaling - Creating an AKS Cluster
 ```
 # azure example
 NAME=aks-getting-started
 RESOURCEGROUP=aks-getting-started
 SERVICE_PRINCIPAL=
 SERVICE_PRINCIPAL_SECRET=
 az aks create -n $NAME \
 --resource-group $RESOURCEGROUP \
 --location australiaeast \
 --kubernetes-version 1.16.10 \
 --nodepool-name default \
 --node-count 1 \
 --node-vm-size Standard_F4s_v2  \
 --node-osdisk-size 250 \
 --service-principal $SERVICE_PRINCIPAL \
 --client-secret $SERVICE_PRINCIPAL_SECRET \
 --output none \
 --enable-cluster-autoscaler \
 --min-count 1 \
 --max-count 5
 ```
 ### Horizontal Pod Autocaling - Creating a Kind Cluster
 My Node has 6 CPU cores for this demo <br/>
 ```
 kind create cluster --name hpa --image kindest/node:v1.18.4
 ```
 ### Metric Server
 * For `Cluster Autoscaler` - On cloud-based clusters, Metric server may already be installed. <br/>
 * For `HPA` - We're using kind
 [Metric Server](https://github.com/kubernetes-sigs/metrics-server) provides container resource metrics for use in autoscaling pipelines <br/>
 Because I run K8s `1.18` in `kind`, the Metric Server version i need is `0.3.7` <br/>
 We will need to deploy Metric Server [0.3.7](https://github.com/kubernetes-sigs/metrics-server/releases/tag/v0.3.7) <br/>
 I used `components.yaml`from the release page link above. <br/>
 <b>Important Note</b> : For Demo clusters (like `kind`), you will need to disable TLS <br/>
 You can disable TLS by adding the following to the metrics-server container args <br/>
 <b>For production, make sure you remove the following :</b> <br/>
 ```
 - --kubelet-insecure-tls
 - --kubelet-preferred-address-types="InternalIP"
 ```
 Deployment: <br/>
 ```
 cd kubernetes\autoscaling
 kubectl -n kube-system apply -f .\metric-server\metricserver-0.3.7.yaml
 #test 
 kubectl -n kube-system get pods
 #wait for metrics to populate
 kubectl top nodes
 ```
 ## Example Application
 For all autoscaling guides, we'll need a simple app, that generates some CPU load <br/>
 * Build the app
 * Push it to a registry
 * Ensure resource requirements are set
 * Deploy it to Kubernetes
 * Ensure metrics are visible for the app
 ```
 # build
 cd kubernetes\autoscaling\application-cpu
 docker build . -t aimvector/application-cpu:v1.0.0
 # push
 docker push aimvector/application-cpu:v1.0.0
 # resource requirements
 resources:
  requests:
    memory: "50Mi"
    cpu: "500m"
  limits:
    memory: "500Mi"
    cpu: "2000m"
 # deploy 
 kubectl apply -f deployment.yaml
 # metrics
 kubectl top pods
 ```
 ## Generate some traffic
 ```
 # get a terminal to the traffic-generator
 kubectl exec -it traffic-generator sh
 # install wrk
 apk add --no-cache wrk curl
 # simulate some load
 wrk -c 5 -t 5 -d 99999 -H "Connection: Close" http://application-cpu
 kubectl scale deploy/application-cpu --replicas 2
 ```