title: 8.4.在k8s中监控redis
order: 46
icon: lightbulb
一、环境
主机名 | IP地址 | 系统 | 说明 |
k8s | 192.168.11.65 | Ubuntu 20.04 | k8s版本:v1.23.10 单机版本 |
serviemonitor
promehtuerules
1、准备环境
略
二、监控redis
1、安装redis
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo update
helm search repo bitnami/redis
helm pull bitnami/redis --version 17.9.2
tar xf redis-17.9.2.tgz
编辑配置文件
vim redis/values.yaml
修改配置如下:设置密码为123456,设置从节点数量,关闭持久化存储
global:
redis:
password: "123456"
#关闭持久化存储
persistence:
enabled: false
#设置一个从节点
replica:
replicaCount: 1
通过sed修改
sed -i '/redis:/{n;s#password: ""#password: "123456"#}' redis/values.yaml
sed -i '/ persistence:/{n;n;n;s#enabled: true#enabled: false#}' redis/values.yaml
sed -i '/^replica:/{n;n;n;s#replicaCount: 3#replicaCount: 1#}' redis/values.yaml
检查
egrep -A3 "password: | persistence:|^replica:" redis/values.yaml
安装
helm install -n monitoring --create-namespace redis redis
检测redis登陆
kubectl exec -it redis-master-0 -n monitoring -- redis-cli -a 123456
127.0.0.1:6379> info
2、创建Deployment部署redis-exporter(三选一)
安装redis-exporter
cat > redis-exporter.yaml <<"EOF"
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: redis-exporter
name: redis-exporter-master
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: redis-exporter
template:
metadata:
labels:
app: redis-exporter
spec:
containers:
- name: redis-exporter
image: oliver006/redis_exporter:latest
env:
- name: TZ
value: "Asia/Shanghai"
- name: REDIS_ADDR
#地址和密码根据实际填写
value: "redis://redis-master:6379"
- name: REDIS_PASSWORD
value: "123456"
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- name: metrics
containerPort: 9121
protocol: TCP
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: redis-exporter
name: redis-exporter-replicas
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: redis-exporter
template:
metadata:
labels:
app: redis-exporter
spec:
containers:
- name: redis-exporter
image: oliver006/redis_exporter:latest
env:
- name: TZ
value: "Asia/Shanghai"
- name: REDIS_ADDR
#地址和密码根据实际填写
value: "redis://redis-replicas:6379"
- name: REDIS_PASSWORD
value: "123456"
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- name: metrics
containerPort: 9121
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
labels:
app: redis-exporter
name: redis-exporter
namespace: monitoring
spec:
ports:
- name: http-metirc
protocol: TCP
port: 9121
targetPort: metrics
selector:
app: redis-exporter
EOF
创建
kubectl create -f redis-exporter.yaml
检查
kubectl get -f redis-exporter.yaml
Prometheus添加配置
添加ServiceMonitor资源对象,使prometheus去收集redis_exporter提供的监控样本数据
使用cat创建redis-sm.yaml文件
cat >redis-sm.yaml<<"EOF"
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: redis-exporter
namespace: monitoring
labels:
app: redis-exporter
release: prometheus
spec:
#jobLabel: redis-exporter
endpoints:
#http-metirc为redis-exporter的Service端口的name,一定要一致
- port: http-metirc
interval: 30s
scheme: http
selector:
matchLabels:
app: redis-exporter
namespaceSelector:
matchNames:
- monitoring
EOF
参数解释:
endpoints:用于配置需要收集 metrics 的 Endpoints 的端口和其他参数(注意:endpoints(小写)是 ServiceMonitor CRD 中的一个字段,而 Endpoints(大写)是 Kubernetes 资源类型)
selector:通过label匹配的方式获取serviceMonitor访问到的后端程序,通常都是是exporter对应的service
创建
kubectl create -f redis-sm.yaml
检查
kubectl get -f redis-sm.yaml
http://192.168.11.65:9090/targets
注意事项
添加告警规则(触发器)
从这里下载告警文件
https://awesome-prometheus-alerts.grep.to/rules.html#host-and-hardware
wget https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/redis/oliver006-redis-exporter.yml
创建PrometheusRule资源对象
cat >> redis-exporter-rules.yml <<"EOF"
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
release: prometheus
name: redis-exporter-rules
namespace: monitoring
spec:
groups:
- name: Redis
rules:
- alert: RedisDown
expr: 'redis_up == 0'
for: 0m
labels:
severity: critical
annotations:
summary: Redis down (instance {{ $labels.instance }})
description: "Redis instance is down
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisMissingMaster
expr: '(count(redis_instance_info{role="master"}) or vector(0)) < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Redis missing master (instance {{ $labels.instance }})
description: "Redis cluster has no node marked as master.
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisTooManyMasters
expr: 'count(redis_instance_info{role="master"}) > 1'
for: 0m
labels:
severity: critical
annotations:
summary: Redis too many masters (instance {{ $labels.instance }})
description: "Redis cluster has too many nodes marked as master.
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisDisconnectedSlaves
expr: 'count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1'
for: 0m
labels:
severity: critical
annotations:
summary: Redis disconnected slaves (instance {{ $labels.instance }})
description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisReplicationBroken
expr: 'delta(redis_connected_slaves[1m]) < 0'
for: 0m
labels:
severity: critical
annotations:
summary: Redis replication broken (instance {{ $labels.instance }})
description: "Redis instance lost a slave
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisClusterFlapping
expr: 'changes(redis_connected_slaves[1m]) > 1'
for: 2m
labels:
severity: critical
annotations:
summary: Redis cluster flapping (instance {{ $labels.instance }})
description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisMissingBackup
expr: 'time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24'
for: 0m
labels:
severity: critical
annotations:
summary: Redis missing backup (instance {{ $labels.instance }})
description: "Redis has not been backuped for 24 hours
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisOutOfSystemMemory
expr: 'redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90'
for: 2m
labels:
severity: warning
annotations:
summary: Redis out of system memory (instance {{ $labels.instance }})
description: "Redis is running out of system memory (> 90%)
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisOutOfConfiguredMaxmemory
expr: 'redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90'
for: 2m
labels:
severity: warning
annotations:
summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
description: "Redis is running out of configured maxmemory (> 90%)
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisTooManyConnections
expr: 'redis_connected_clients > 100'
for: 2m
labels:
severity: warning
annotations:
summary: Redis too many connections (instance {{ $labels.instance }})
description: "Redis instance has too many connections
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisNotEnoughConnections
expr: 'redis_connected_clients < 5'
for: 2m
labels:
severity: warning
annotations:
summary: Redis not enough connections (instance {{ $labels.instance }})
description: "Redis instance should have more connections (> 5)
VALUE = {{ $value }}
LABELS = {{ $labels }}"
- alert: RedisRejectedConnections
expr: 'increase(redis_rejected_connections_total[1m]) > 0'
for: 0m
labels:
severity: critical
annotations:
summary: Redis rejected connections (instance {{ $labels.instance }})
description: "Some connections to Redis has been rejected
VALUE = {{ $value }}
LABELS = {{ $labels }}"
EOF
创建
redis-exporter-rules.yml
检查
kubectl get -f redis-exporter-rules.yml
http://192.168.11.65:9090/alerts?search=
3、修改配置安装redis-exporter(三选一)
- 修改配置,安装redis_exporter并监控,添加告警规则
bitnami/redis
修改配置
vim redis/values.yaml
metrics:
enabled: true
serviceMonitor:
enabled: true
additionalLabels:
release: prometheus
prometheusRule:
enabled: false
additionalLabels:
release: prometheus
更新配置
helm upgrade -n monitoring --create-namespace redis redis
检查pod
kubectl get pod -n monitoring
创建PrometheusRule文件
参考上面
检查ServiceMonitor
kubectl get servicemonitors redis -n monitoring -oyaml
检查PrometheusRule
kubectl get prometheusrules redis -n monitoring -oyaml
web检查
http://192.168.11.65:9090/targets?search=
http://192.168.11.65:9090/alerts?search= 检查
4、prometheus-redis-exporter(三选一)
安装prometheus-redis-exporter
- 通过prometheus-community仓库安装prometheus-redis-exporter
helm search repo prometheus-community|grep redis
prometheus-community/prometheus-redis-exporter 5.3.1 v1.44.0 Prometheus exporter for Redis metrics
helm fetch prometheus-community/prometheus-redis-exporter
tar xf prometheus-redis-exporter-5.3.1.tgz
通过wget下载
wget https://github.com/prometheus-community/helm-charts/releases/download/prometheus-redis-exporter-5.3.1/prometheus-redis-exporter-5.3.1.tgz
tar xf prometheus-redis-exporter-5.3.1.tgz
grep -A 2 'image:' prometheus-redis-exporter/*
#不用替换
修改配置
vim prometheus-redis-exporter/values.yaml
开启serviceMonitor和prometheusRule,添加lables: release: prometheus
从 5.0.0 开始,redis exporter helm chart 支持多个目标。
通过启用serviceMonitor.multipleTarget
和设置目标serviceMonitor.targets
,可以抓取多个 redis 实例。
redisAddress: redis://redis-master:6379
serviceMonitor:
enabled: true
auth:
enabled: true
secret:
name: "redis"
key: "redis-password"
#redisPassword: "123456"
#如果要使用redisPassword: "123456"这个参数,vim prometheus-redis-exporter/templates/deployment.yaml
#把value: {{ .Values.auth.redisPassword }}修改为value: "{{ .Values.auth.redisPassword }}",加双引号
helm安装
helm install -n monitoring --create-namespace prometheus-redis-exporter prometheus-redis-exporter
检查metrics
root@k8s:~# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S)
prometheus-redis-exporter ClusterIP 10.233.48.200 <none> 9121/TCP 11m
#通过curl检查
curl 10.233.48.200:9121/metrics
检查servicemonitors
kubectl get servicemonitors prometheus-redis-exporter -n monitoring -oyaml
检查prometheusrules
kubectl get prometheusrules prometheus-redis-exporter -n monitoring -oyaml
5、问题
http://192.168.11.65:9090/targets?search= 检查没有redis
解决:
检查prometheus-mysql-exporter的ServiceMonitor配置的labels是否有release: prometheus
kubectl get ServiceMonitor prometheus-redis-exporter -n monitoring -oyaml
labels:
release: prometheus #是否有这行
#如果没有通过命令增加
kubectl label servicemonitors prometheus-redis-exporter release=prometheus -n monitoring
完成后检查
6、grafana添加dashboard
id:11835
id:17507
集群方式监控,id:14615
图形展示成功。图形问题是因为最大内存没有设置
总:
二进制:安装redis-exporter 修改prometheus配置(增加metrics地址,重启promehtues),增加告警规则,dashboard
K8S: 安装redis-exporter 添加Servicemonitor, 增加prometheusrules dashboard
三、我的微信
如果碰到问题,可以随时加我微信,谢谢
评论区