kafka wip

2025-06-06 17:01:30 +00:00 · 2021-04-19 07:51:15 +10:00 · 2021-04-19 07:51:15 +10:00 · 6c51b181fc
commit 6c51b181fc
parent ec38685fcd
13 changed files with 373 additions and 0 deletions
--- a/messaging/kafka/README.md
+++ b/messaging/kafka/README.md
@ -0,0 +1,59 @@
+# Notes 
+https://hub.docker.com/_/openjdk?tab=description&page=1&ordering=last_updated&name=alpine
+https://www.digitalocean.com/community/tutorials/how-to-install-apache-kafka-on-debian-10
+
+# Building a Docker file
+docker run --rm --name kafka -it kafka bash
+
+docker run --rm -it kafka bash -c "ls -l /kafka/"
+docker run --rm -it kafka bash -c "cat ~/kafka/config/server.properties"
+docker run --rm -it kafka bash -c "ls -l ~/kafka/bin"
+
+docker cp kafka:/kafka/config/server.properties ./server.properties
+docker cp kafka:/kafka/config/zookeeper.properties ./zookeeper/zookeeper.properties
+
+# Kafka
+
+docker network create kafka
+docker run -it --rm --name kafka --net kafka -v ${PWD}/server.properties:/kafka/config/server.properties kafka
+
+# Zookeeper
+
+docker run -it --rm --name zookeeper --net kafka zookeeper
+
+# Topic
+docker exec -it kafka bash
+
+/kafka/bin/kafka-topics.sh --create --zookeeper zookeeper:2181 --replication-factor 1 --partitions 1 --topic TutorialTopic
+
+# Producer
+
+echo "Hello, World" | /kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic TutorialTopic > /dev/null
+
+# Consumer
+
+/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic TutorialTopic --from-beginning
+
+# Build an Application: Producer
+
+https://docs.confluent.io/clients-confluent-kafka-go/current/overview.html#go-installation
+
+```
+cd messaging/kafka/applications/producer
+
+docker run -it --rm -v ${PWD}:/app -w /app golang:1.15-alpine
+
+apk -U add ca-certificates && \
+apk update && apk upgrade && apk add pkgconf git bash build-base && \
+cd /tmp && \
+git clone https://github.com/edenhill/librdkafka.git && \
+  cd librdkafka && \
+  git checkout v1.6.1 && \ 
+  ./configure --prefix /usr && make && make install
+
+#apk add --no-cache git make librdkafka-dev gcc musl-dev librdkafka
+
+go mod init producer
+go get gopkg.in/confluentinc/confluent-kafka-go.v1/kafka
+```
+
--- a/messaging/kafka/applications/consumer/consumer.go
+++ b/messaging/kafka/applications/consumer/consumer.go
@ -0,0 +1,34 @@
+package main
+
+
+import (
+	"fmt"
+	log "github.com/sirupsen/logrus"
+	"os"
+)
+
+var kafka_host = os.Getenv("KAFKA_HOSTS")
+
+func main() {
+	consume()
+}
+
+func consume() {
+
+	if err != nil {
+		log.Fatalf("%s: %s", "Failed to connect to Kafka", err)
+	}
+
+	forever := make(chan bool)
+
+	go func() {
+		for d := range msgs {
+			log.Printf("Received a message: %s", d.Body)
+			
+			d.Ack(false)
+		}
+	  }()
+	  
+	  fmt.Println("Running...")
+	  <-forever
+}
--- a/messaging/kafka/applications/consumer/dockerfile
+++ b/messaging/kafka/applications/consumer/dockerfile
@ -0,0 +1,15 @@
+FROM golang:1.14-alpine as build
+
+RUN apk add --no-cache git
+
+WORKDIR /src 
+
+COPY consumer.go /src 
+
+RUN go build consumer.go
+
+FROM alpine as runtime
+
+COPY --from=build /src/consumer /app/consumer
+
+CMD [ "/app/consumer" ]
--- a/messaging/kafka/applications/producer/dockerfile
+++ b/messaging/kafka/applications/producer/dockerfile
@ -0,0 +1,15 @@
+FROM golang:1.15-alpine as build
+
+RUN apk add --no-cache git
+
+WORKDIR /src 
+
+COPY publisher.go /src 
+
+RUN go build publisher.go
+
+FROM alpine as runtime
+
+COPY --from=build /src/publisher /app/publisher
+
+CMD [ "/app/publisher" ]
--- a/messaging/kafka/applications/producer/go.mod
+++ b/messaging/kafka/applications/producer/go.mod
@ -0,0 +1,8 @@
+module producer
+
+go 1.15
+
+require (
+	github.com/confluentinc/confluent-kafka-go v1.6.1 // indirect
+	gopkg.in/confluentinc/confluent-kafka-go.v1 v1.6.1 // indirect
+)
--- a/messaging/kafka/applications/producer/go.sum
+++ b/messaging/kafka/applications/producer/go.sum
@ -0,0 +1,4 @@
+github.com/confluentinc/confluent-kafka-go v1.6.1 h1:YxM/UtMQ2vgJX2gIgeJFUD0ANQYTEvfo4Cs4qKUlmGE=
+github.com/confluentinc/confluent-kafka-go v1.6.1/go.mod h1:u2zNLny2xq+5rWeTQjFHbDzzNuba4P1vo31r9r4uAdg=
+gopkg.in/confluentinc/confluent-kafka-go.v1 v1.6.1 h1:nKc5Vj4Kko8O6khwOIxQ2UqkEZP7ZZ91vb/lI+ephvk=
+gopkg.in/confluentinc/confluent-kafka-go.v1 v1.6.1/go.mod h1:ZdI3yfYmdNSLQPNCpO1y00EHyWaHG5EnQEyL/ntAegY=
--- a/messaging/kafka/applications/producer/publisher.go
+++ b/messaging/kafka/applications/producer/publisher.go
@ -0,0 +1,36 @@
+package main
+
+import (
+	"fmt"
+	"net/http"
+	"github.com/julienschmidt/httprouter"
+	log "github.com/sirupsen/logrus"
+	"os"
+)
+
+var kafka_host = os.Getenv("KAFKA_HOSTS")
+
+func main() {
+
+	router := httprouter.New()
+
+	router.POST("/publish/:message", func(w http.ResponseWriter, r *http.Request, p httprouter.Params){
+		submit(w,r,p)
+	})
+
+	fmt.Println("Running...")
+	log.Fatal(http.ListenAndServe(":80", router))
+}
+
+func submit(writer http.ResponseWriter, request *http.Request, p httprouter.Params) {
+	message := p.ByName("message")
+	
+	fmt.Println("Received message: " + message)
+
+	if err != nil {
+		log.Fatalf("%s: %s", "Failed to connect to Kafka", err)
+	}
+
+	defer conn.Close()
+	fmt.Println("publish success!")
+}
--- a/messaging/kafka/dockerfile
+++ b/messaging/kafka/dockerfile
@ -0,0 +1,18 @@
+FROM openjdk:11.0.10-jre-buster
+
+ENV KAFKA_VERSION 2.7.0
+ENV SCALA_VERSION 2.13 
+RUN mkdir /tmp/kafka && \
+    apt-get update && \
+    apt-get install -y curl
+         
+RUN curl "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" \
+    -o /tmp/kafka/kafka.tgz && \
+    mkdir /kafka && cd /kafka && \
+    tar -xvzf /tmp/kafka/kafka.tgz --strip 1
+
+COPY start-kafka.sh  /usr/bin
+RUN chmod +x  /usr/bin/start-kafka.sh
+
+CMD ["start-kafka.sh"]
+    
--- a/messaging/kafka/server.properties
+++ b/messaging/kafka/server.properties
@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id=0
+
+############################# Socket Server Settings #############################
+
+# The address the socket server listens on. It will get the value returned from 
+# java.net.InetAddress.getCanonicalHostName() if not configured.
+#   FORMAT:
+#     listeners = listener_name://host_name:port
+#   EXAMPLE:
+#     listeners = PLAINTEXT://your.host.name:9092
+#listeners=PLAINTEXT://:9092
+
+# Hostname and port the broker will advertise to producers and consumers. If not set, 
+# it uses the value for "listeners" if configured.  Otherwise, it will use the value
+# returned from java.net.InetAddress.getCanonicalHostName().
+#advertised.listeners=PLAINTEXT://your.host.name:9092
+
+# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
+#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL
+
+# The number of threads that the server uses for receiving requests from the network and sending responses to the network
+num.network.threads=3
+
+# The number of threads that the server uses for processing requests, which may include disk I/O
+num.io.threads=8
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=102400
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=102400
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# A comma separated list of directories under which to store log files
+log.dirs=/tmp/kafka-logs
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions=1
+
+# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
+# This value is recommended to be increased for installations with data dirs located in RAID array.
+num.recovery.threads.per.data.dir=1
+
+############################# Internal Topic Settings  #############################
+# The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state"
+# For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3.
+offsets.topic.replication.factor=1
+transaction.state.log.replication.factor=1
+transaction.state.log.min.isr=1
+
+############################# Log Flush Policy #############################
+
+# Messages are immediately written to the filesystem but by default we only fsync() to sync
+# the OS cache lazily. The following configurations control the flush of data to disk.
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data may be lost if you are not using replication.
+#    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
+#    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks.
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+#log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+#log.flush.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion due to age
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log unless the remaining
+# segments drop below log.retention.bytes. Functions independently of log.retention.hours.
+#log.retention.bytes=1073741824
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+log.segment.bytes=1073741824
+
+# The interval at which log segments are checked to see if they can be deleted according
+# to the retention policies
+log.retention.check.interval.ms=300000
+
+############################# Zookeeper #############################
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect=zookeeper:2181
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=18000
+
+
+############################# Group Coordinator Settings #############################
+
+# The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance.
+# The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms.
+# The default value for this is 3 seconds.
+# We override this to 0 here as it makes for a better out-of-the-box experience for development and testing.
+# However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup.
+group.initial.rebalance.delay.ms=0
--- a/messaging/kafka/start-kafka.sh
+++ b/messaging/kafka/start-kafka.sh
@ -0,0 +1,3 @@
+#!/bin/bash -e
+
+exec "/kafka/bin/kafka-server-start.sh" "/kafka/config/server.properties"
--- a/messaging/kafka/zookeeper/dockerfile
+++ b/messaging/kafka/zookeeper/dockerfile
@ -0,0 +1,18 @@
+FROM openjdk:11.0.10-jre-buster
+
+ENV KAFKA_VERSION 2.7.0
+ENV SCALA_VERSION 2.13 
+RUN mkdir /tmp/kafka && \
+    apt-get update && \
+    apt-get install -y curl
+         
+RUN curl "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" \
+    -o /tmp/kafka/kafka.tgz && \
+    mkdir /kafka && cd /kafka && \
+    tar -xvzf /tmp/kafka/kafka.tgz --strip 1
+
+COPY start-zookeeper.sh  /usr/bin
+RUN chmod +x  /usr/bin/start-zookeeper.sh
+
+CMD ["start-zookeeper.sh"]
+    
--- a/messaging/kafka/zookeeper/start-zookeeper.sh
+++ b/messaging/kafka/zookeeper/start-zookeeper.sh
@ -0,0 +1,3 @@
+#!/bin/bash -e
+
+exec "/kafka/bin/zookeeper-server-start.sh" "/kafka/config/zookeeper.properties"
--- a/messaging/kafka/zookeeper/zookeeper.properties
+++ b/messaging/kafka/zookeeper/zookeeper.properties
@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# the directory where the snapshot is stored.
+dataDir=/tmp/zookeeper
+# the port at which the clients will connect
+clientPort=2181
+# disable the per-ip limit on the number of connections since this is a non-production config
+maxClientCnxns=0
+# Disable the adminserver by default to avoid port conflicts.
+# Set the port to something non-conflicting if choosing to enable this
+admin.enableServer=false
+# admin.serverPort=8080