title: 8.5.在k8s中监控MySQL
order: 47
icon: lightbulb
一、环境
主机名 | IP地址 | 系统 | 说明 |
k8s | 192.168.11.65 | Ubuntu 20.04 | k8s版本:v1.23.10 单机版本 |
1、准备环境
kube-prometheus-stack版本45.8.0
二、监控MySQL
1、安装mysql
kubectl create deploy mysql --image=mysql:8.0
kubectl set env deploy/mysql MYSQL_ROOT_PASSWORD=password
kubectl expose deploy mysql --port=3306 --type=NodePort
kubectl get svc -l app=mysql
kubectl get po -l app=mysql
NAME READY STATUS RESTARTS AGE
mysql-77cf856468-v4t9j 1/1 Running 0 14s
2、创建监控用户
kubectl exec -it mysql-77cf856468-v4t9j -- bash
#登录数据库
mysql -uroot -ppassword
执行如下命令:
mysql> CREATE USER 'exporter'@'%' IDENTIFIED BY 'password' WITH MAX_USER_CONNECTIONS 3;
mysql> GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'%';
#退出mysql
mysql> quit
#检查刚创建的用户是否成功
mysql -uexporter -ppassword
#显示所有数据库
mysql> show databases;
#退出mysql
mysql> quit
#退出容器
exit
3、安装mysql-exporter
通过prometheus-community下载,需要梯子
#搜索mysql-exporter
helm search repo prometheus-community
helm fetch prometheus-community/prometheus-mysql-exporter
ls -l
tar xf prometheus-mysql-exporter-1.13.0.tgz
通过wget下载
wget https://github.com/prometheus-community/helm-charts/releases/download/prometheus-mysql-exporter-1.13.0/prometheus-mysql-exporter-1.13.0.tgz
tar xf prometheus-mysql-exporter-1.13.0.tgz
grep -A 2 'image:' prometheus-mysql-exporter/*
grep -rn 'quay.io' prometheus-mysql-exporter/*
#批量替换
sed -i 's/quay.io/quay.mirrors.ustc.edu.cn/g' `grep "quay.io" -rl prometheus-mysql-exporter/*`
修改配置
vim prometheus-mysql-exporter/values.yaml
- 修改监控mysql地址、用户名和密码,并开启ServiceMonitor
serviceMonitor:
enabled: true
additionalLabels:
release: prometheus
mysql:
db: ""
host: "mysql.default"
pass: "password"
port: 3306
user: "exporter"
collectors:
engine_innodb_status: true
info_schema.innodb_metrics: true
info_schema.processlist: true
info_schema.tables: true
info_schema.tablestats: true
info_schema.schemastats: true
perf_schema.tablelocks: true
使用sed修改
sed -i 's#host: "localhost"#host: "mysql.default"#g' prometheus-mysql-exporter/values.yaml
sed -i 's#collectors: {}#collectors:#g' prometheus-mysql-exporter/values.yaml
sed -i 's#collectors: {}#collectors:#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# info_schema.innodb_metrics: false#info_schema.innodb_metrics: true#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# engine_innodb_status: false#engine_innodb_status: true#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# info_schema.processlist: false#info_schema.processlist: true#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# info_schema.tables: true#info_schema.tables: true#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# info_schema.tablestats: false#info_schema.tablestats: true#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# info_schema.schemastats: false#info_schema.schemastats: true#g' prometheus-mysql-exporter/values.yaml
sed -i 's#\# perf_schema.tablelocks: false#perf_schema.tablelocks: true#g' prometheus-mysql-exporter/values.yaml
sed -i '/additionalLabels/{s#{}#
release: prometheus#}' prometheus-mysql-exporter/values.yaml
sed -i '/serviceMonitor/{n;n;s#enabled: false#enabled: true#}' prometheus-mysql-exporter/values.yaml
安装prometheus-mysql-exporter
helm install -n monitoring --create-namespace prometheus-mysql-exporter prometheus-mysql-exporter
如果修改了配置文件
helm -n monitoring upgrade prometheus-mysql-exporter prometheus-mysql-exporter
检查pod是否正常启动
kubectl get pod -n monitoring
问题
http://192.168.11.65:9090/targets?search=检查没有mysql
解决:
检查prometheus-mysql-exporter的ServiceMonitor配置的labels是否有release: prometheus
kubectl get ServiceMonitor prometheus-mysql-exporter -n monitoring -oyaml
labels:
release: prometheus #是否有这行
#如果没有通过命令增加
kubectl label servicemonitors prometheus-mysql-exporter release=prometheus -n monitoring
完成后检查
root@k8s:~# kubectl get svc prometheus-mysql-exporter -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
prometheus-mysql-exporter ClusterIP 10.233.36.28 <none> 9104/TCP 62m
检查metrics
curl http://10.233.36.28:9104/metrics
web上检查targets
4、创建告警规则(触发器)
从这里下载告警文件
https://awesome-prometheus-alerts.grep.to/rules.html#host-and-hardware
wget https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/mysql/mysqld-exporter.yml
使用cat创建PrometheusRule
资源对象
cat >> prometheus-mysql-exporter-rules.yml <<"EOF"
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
release: prometheus
name: prometheus-mysql-exporter-rules
namespace: monitoring
spec:
groups:
- name: MySQL
rules:
- alert: MysqlDown
expr: mysql_up == 0
for: 30s
labels:
severity: critical
annotations:
summary: "MySQL Down,实例: {{ $labels.instance }}"
description: "MySQL_exporter连不上MySQL了,当前状态为:{{ $value }}"
- alert: MysqlTooManyConnections
expr: max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections * 100 > 80
for: 2m
labels:
severity: warning
annotations:
summary: "Mysql连接数过多告警,实例: {{ $labels.instance }}"
description: "MySQL连接数>80%,当前值:{{ $value }}"
- alert: MysqlHighThreadsRunning
expr: max_over_time(mysql_global_status_threads_running[1m]) > 20
for: 2m
labels:
severity: warning
annotations:
summary: "Mysql运行的线程过多,实例: {{ $labels.instance }}"
description: "Mysql运行的线程 > 20,当前运行的线程:{{ $value }}"
- alert: MysqlSlowQueries
expr: increase(mysql_global_status_slow_queries[2m]) > 0
for: 2m
labels:
severity: warning
annotations:
summary: "Mysql慢日志告警,实例: {{ $labels.instance }}"
description: "MySQL在过去2分钟有新的{{ $value }}条慢查询"
#MySQL innodb 日志写入停滞
- alert: MysqlInnodbLogWaits
expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
for: 0m
labels:
severity: warning
annotations:
summary: "MySQL innodb日志等待,实例: {{ $labels.instance }}"
description: "MySQL innodb日志写入停滞,当前值: {{ $value }}"
- alert: MysqlRestarted
expr: mysql_global_status_uptime < 60
for: 0m
labels:
severity: info
annotations:
summary: "MySQL 重启,实例: {{ $labels.instance }}"
description: "不到一分钟前,MySQL重启过"
EOF
创建
kubectl create -f prometheus-mysql-exporter-rules.yml
检查
kubectl get PrometheusRule prometheus-mysql-exporter-rules -n monitoring -o yaml
6、grafana添加Dashboard
登录grafana http://192.168.11.65:3000 输入用户名和密码
id:7362
表监控:9625
innodb:9624
图形展示正常如下图:
7、测试
把mysql停止看下是否告警出来
#把mysql的副本设置为0
kubectl scale --replicas=0 deployment mysql
#检查
kubectl get pod -o wide
prometheus 的web上检查alerts
三、我的微信
如果碰到问题,可以随时加我微信,谢谢
评论区