title: 8.6.在k8s中黑盒监控
order: 48
icon: lightbulb
一、环境
主机名 | IP地址 | 系统 | 说明 |
k8s | 192.168.11.65 | Ubuntu 20.04 | k8s版本:v1.23.10 单机版本 |
二、黑盒监控
1、安装blackbox-exporter
使用helm安装
通过 prometheus-community仓库下载
helm search repo prometheus-community|grep prometheus-blackbox-exporter
helm fetch prometheus-community/prometheus-blackbox-exporter
tar xf prometheus-blackbox-exporter-7.6.1.tgz
通过wget下载
wget https://github.com/prometheus-community/helm-charts/releases/download/prometheus-blackbox-exporter-7.6.1/prometheus-blackbox-exporter-7.6.1.tgz
tar xf prometheus-blackbox-exporter-7.6.1.tgz
grep -A 2 'image:' prometheus-blackbox-exporter/*
#不用替换
修改配置文件
vim prometheus-blackbox-exporter/values.yaml
```
config:
modules:
tcp_connect:
prober: tcp
icmp:
prober: icmp
serviceMonitor:
enabled: true
defaults:
labels:
release: prometheus
targets:
- name: baidu.com
url: https://baidu.com
#- name: xxx
# url: xxxservice.namespace名称.svc.cluster.local:port/actuator/health
- name: tcp-mysql
url: mysql.default.svc.cluster.local:3306
module: tcp_connect
- name: icmp-localhost
url: 127.0.0.1
module: icmp
#告警规则(触发器)
prometheusRule:
enabled: true
additionalLabels:
release: prometheus
namespace: ""
rules:
- alert: 黑盒子探测失败告警
expr: probe_success == 0
for: 1m
labels:
severity: critical
annotations:
summary: "黑盒子探测失败{{ $labels.instance }}"
description: "黑盒子检测失败,当前值:{{ $value }}"
- alert: 请求慢告警
expr: avg_over_time(probe_duration_seconds[1m]) > 1
for: 1m
labels:
severity: warning
annotations:
summary: "请求慢{{ $labels.instance }}"
description: "请求时间超过1秒,值为:{{ $value }}"
- alert: http状态码检测失败
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
for: 1m
labels:
severity: critical
annotations:
summary: "http状态码检测失败{{ $labels.instance }}"
description: "HTTP状态码非 200-399,当前状态码为:{{ $value }}"
- alert: ssl证书即将到期
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
for: 1m
labels:
severity: warning
annotations:
summary: "证书即将到期{{ $labels.instance }}"
description: "SSL 证书在 30 天后到期,值:{{ $value }}"
- alert: ssl证书即将到期
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
for: 1m
labels:
severity: critical
annotations:
summary: "证书即将到期{{ $labels.instance }}"
description: "SSL 证书在 3 天后到期,值:{{ $value }}"
- alert: ssl证书已过期
expr: probe_ssl_earliest_cert_expiry - time() <= 0
for: 1m
labels:
severity: critical
annotations:
summary: "证书已过期{{ $labels.instance }}"
description: "SSL 证书已经过期,请确认是否在使用"
安装blackbox-exporter
helm install -n monitoring --create-namespace prometheus-blackbox-exporter prometheus-blackbox-exporter
如果修改了配置文件
helm -n monitoring upgrade prometheus-blackbox-exporter prometheus-blackbox-exporter
检查
通过curl检查
curl http://10.233.55.233:9115/probe?target=https://www.baidu.com&module=http_2xx
检查配置
root@k8s:~# kubectl -n monitoring get cm prometheus-blackbox-exporter -o yaml
apiVersion: v1
data:
blackbox.yaml: |
modules:
http_2xx:
http:
follow_redirects: true
preferred_ip_protocol: ip4
valid_http_versions:
- HTTP/1.1
- HTTP/2.0
prober: http
timeout: 5s
icmp:
prober: icmp
tcp_connect:
prober: tcp
```
root@k8s:~# kubectl get servicemonitors -n monitoring
NAME AGE
prometheus-blackbox-exporter-baidu.com 3m19s
prometheus-blackbox-exporter-icmp-localhost 3m19s
prometheus-blackbox-exporter-tcp-mysql 3m19s
检查配置
kubectl get prometheusrules prometheus-blackbox-exporter -n monitoring -oyaml
检查targets
检查数据
检查触发器
http://192.168.11.65:9090/alerts?search=
3、grafama导入dashboard
id:13659
三、我的微信
如果碰到问题,可以随时加我微信,谢谢
评论区