tencent cloud

腾讯云可观测平台

动态与公告
产品动态
产品简介
产品概述
产品优势
基本功能
基本概念
应用场景
使用限制
购买指南
云产品监控
应用性能监控
终端性能监控
前端性能监控
云拨测
Prometheus 监控服务
Grafana 服务
事件总线
云压测
快速入门
监控概览
实例分组
云产品监控
应用性能监控
云拨测
云压测
Prometheus 监控服务
Grafana 服务
创建 Dashboard
事件总线
告警服务
云产品监控
云产品监控指标
控制台操作指南
云服务器监控组件
云产品监控对接 Grafana
故障处理
实践教程
应用性能监控
应用性能监控简介
接入指南
控制台操作指南
实践教程
参考信息
常见问题
终端性能监控
终端性能监控概述
控制台操作指南
接入指南
实践教程
前端性能监控
前端性能监控简介
控制台操作指南
接入指南
常见问题
云拨测
产品简介
控制台操作指南
常见问题
云压测
云压测概述
控制台操作指南
实践教程
JavaScript API 列表
常见问题
Prometheus 监控
Prometheus 监控简介
接入指南
控制台操作指南
实践教程
Terraform
常见问题
Grafana 服务
产品简介
控制台操作指南
Grafana 平台常用功能指引
常见问题
Dashboard
什么是 Dashboard
控制台操作指南
告警管理
控制台操作指南
故障处理
常见问题
事件总线
事件总线简介
控制台操作指南
实践教程
常见问题
报表管理
常见问题
腾讯云可观测平台常见问题
告警服务相关
一般性问题
监控图表相关
云服务器监控组件相关
动态阈值告警相关
云监控对接 Grafana 相关
文档阅读指南
相关协议
应用性能监控服务等级协议
APM 隐私协议
APM 数据处理和安全协议
前端性能监控服务等级协议
终端性能监控服务等级协议
云拨测服务等级协议
Prometheus 监控服务服务等级协议
Grafana 服务服务等级协议
云压测服务等级协议
云压测使用限制
Cloud Monitor Service Level Agreement
词汇表

容器监控图表指标

PDF
聚焦模式
字号
最后更新时间: 2024-08-07 21:55:37

集群监控概览

图表名称
查询语句
使用的指标
配置文件
CPU Requests Commitment
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"})
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
kube_node_status_allocatable_cpu_cores
kube-state-metrics
CPU Limits Commitment
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"})
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
kube_node_status_allocatable_cpu_cores
kube-state-metrics
Memory Utilisation
1 - sum(:node_memory_MemAvailable_bytes:sum{cluster="$cluster"}) / sum(node_memory_MemTotal_bytes{cluster="$cluster"})
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
Memory Requests Commitment
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"})
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
kube_node_status_allocatable_memory_bytes
kube-state-metrics
Memory Limits Commitment
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"})
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
kube_node_status_allocatable_memory_bytes
kube-state-metrics
Node Count
count(kube_node_info{cluster="$cluster"})
kube_node_info
kube-state-metrics
Pod Count
count(kube_pod_info{cluster="$cluster"})
kube_pod_info
kube-state-metrics
Node Request CPU Average Percent
avg(sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_cpu_cores{cluster="$cluster"})by(node))
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
kube_node_status_capacity_cpu_cores
kube-state-metrics
Node Request Memory Average Percent
avg(sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_memory_bytes{cluster="$cluster"})by(node))
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
kube_node_status_capacity_memory_bytes
kube-state-metrics
API Server Success Request Percent
sum(irate(apiserver_request_total{cluster="$cluster",code=~"20.*",verb=~"GET|LIST"}[5m]))/sum(irate(apiserver_request_total{cluster="$cluster",verb=~"GET|LIST"}[5m]))
apiserver_request_total
kube-apiserver
apiserver_request_total
kube-apiserver
Namespace Overview
count(kube_pod_info{cluster="$cluster"}) by (namespace)
kube_pod_info
kube-state-metrics
count(kube_service_info{cluster="$cluster"}) by(namespace)
kube_service_info
kube-state-metrics
count(kube_pod_container_info{cluster="$cluster"}) by(namespace)
kube_pod_container_info
kube-state-metrics
count(kube_configmap_info{cluster="$cluster"}) by(namespace)
kube_configmap_info
kube-state-metrics
count(kube_secret_info{cluster="$cluster"}) by(namespace)
kube_secret_info
kube-state-metrics
count(kube_deployment_created{cluster="$cluster"}) by (namespace)
kube_deployment_created
kube-state-metrics
count(kube_statefulset_created{cluster="$cluster"}) by (namespace)
kube_statefulset_created
kube-state-metrics
count(kube_job_created{cluster="$cluster"}) by (namespace)
kube_job_created
kube-state-metrics
count(kube_cronjob_created{cluster="$cluster"}) by (namespace)
kube_cronjob_created
kube-state-metrics
count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace) - (count(kube_pod_status_phase{cluster="$cluster",phase="Succeeded"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace)
kube_pod_status_ready
kube-state-metrics
kube_pod_status_phase
kube-state-metrics
kube_pod_status_ready
kube-state-metrics
count(kube_deployment_status_replicas_ready{cluster="$cluster"}<kube_deployment_spec_replicas{cluster="$cluster"}) by (namespace)
kube_deployment_status_replicas_ready
kube-state-metrics
kube_deployment_spec_replicas
kube-state-metrics
count(kube_statefulset_status_replicas_ready{cluster="$cluster"}<kube_statefulset_replicas{cluster="$cluster"}) by (namespace)
kube_statefulset_status_replicas_ready
kube-state-metrics
kube_statefulset_replicas
kube-state-metrics
count(kube_daemonset_status_number_unavailable{cluster="$cluster"}>0)by(namespace)
kube_daemonset_status_number_unavailable
kube-state-metrics
count(kube_job_status_failed{cluster="$cluster"} == 1) by (namespace)
kube_job_status_failed
kube-state-metrics
count(kube_daemonset_created{cluster="$cluster"}) by (namespace)
kube_daemonset_created
kube-state-metrics
count(kube_persistentvolumeclaim_info{cluster="$cluster"}) by (namespace)
kube_persistentvolumeclaim_info
kube-state-metrics
CPU Usage
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
CPU Quota
sum(kube_pod_owner{cluster="$cluster"}) by (namespace)
kube_pod_owner
kube-state-metrics
count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage (working_set)
sum(container_memory_working_set_bytes{cluster="$cluster", container!="", container!="POD"}) by (namespace)
container_memory_working_set_bytes
cadvisor
Memory Requests
sum(kube_pod_owner{cluster="$cluster"}) by (namespace)
kube_pod_owner
kube-state-metrics
count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace)
container_memory_rss
cadvisor
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace)
container_memory_rss
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace)
container_memory_rss
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Node Memory Usage (Top 10)
sum(label_replace(topk(10, 1-(node_memory_MemAvailable_bytes{cluster="$cluster"} / node_memory_MemTotal_bytes{cluster="$cluster"})), "node_ip", "$1", "instance", "(.*)"))by(node_ip)
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
Node CPU Usage (Top 10)
topk(10, sum(label_replace(1 - sum(rate(node_cpu_seconds_total{cluster="$cluster",mode="idle"}[1m])) by (instance) / sum(rate(node_cpu_seconds_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_cpu_seconds_total
node-exporter
node_cpu_seconds_total
node-exporter
Node Disk Usage (Top 10)
topk(10, sum(label_replace(1-node_filesystem_free_bytes{cluster="$cluster",mountpoint="/"}/node_filesystem_size_bytes{cluster="$cluster",mountpoint="/",fstype!="rootfs"},"host_ip","$1","instance","(.*)"))by(host_ip))
node_filesystem_free_bytes
node-exporter
Node Network In (Top 10)
topk(10, sum(label_replace(max(irate(node_network_receive_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_network_receive_bytes_total
node-exporter
Node Network Out (Top 10)
topk(10, sum(label_replace(max(irate(node_network_transmit_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_network_transmit_bytes_total
node-exporter
Node Sockets Count(Top 10)
topk(10, sum(label_replace(max(node_sockstat_TCP_alloc{cluster="$cluster"}) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_sockstat_TCP_alloc
node-exporter
Container Memory Usage(Top10)
topk(10, sum (container_memory_working_set_bytes{cluster="$cluster",container !="",container!="POD"}) by (container))
container_memory_working_set_bytes
cadvisor
Container Memory Usage/Limit(Top10)
topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace))
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Container CPU Usage(Top10)
topk(10, sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",container !="",container!="POD"}[2m])) by (container))
container_cpu_usage_seconds_total
cadvisor
Container Network
topk(10, sum(irate(container_network_receive_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod))
container_network_receive_bytes_total
cadvisor
-topk(10, sum(irate(container_network_transmit_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod))
container_network_transmit_bytes_total
cadvisor
Container Memory Usage/Limit (Top 10)
topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace))
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Container CPU Usage (Top 10)
topk(10, sum(irate(container_cpu_usage_seconds_total{cluster="$cluster",container!="",container!="POD"}[1m])) by (container,pod,namespace)or on() vector(0))
container_cpu_usage_seconds_total
cadvisor
Container Socket Count(Top 10)
topk(10, sum(container_sockets{cluster="$cluster",container!=""}) by (container,pod,namespace)or on() vector(0))
container_sockets
cadvisor

集群 Namespace 大盘

图表名称
查询语句
使用的指标
配置文件
CPU Usage
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))
container_cpu_usage_seconds_total
cadvisor
CPU Usage/Request(%)
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Usage/Limit(%)
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Request
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Limit
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Cluster Available
sum(sum(kube_node_status_capacity{resource="cpu",cluster="$cluster",namespace=~"$namespace"}) by (node) + sum(kube_node_spec_unschedulable{cluster="$cluster",namespace=~"$namespace"}==0) by(node))
kube_node_status_capacity
kube-state-metrics
kube_node_spec_unschedulable
kube-state-metrics
StatefulSet Created
count(kube_statefulset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_statefulset_created
kube-state-metrics
Pod Created
count(kube_pod_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_pod_info
kube-state-metrics
Containers
count(kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_pod_container_info
kube-state-metrics
DaemonSet Created
count(kube_daemonset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_daemonset_created
kube-state-metrics
Job Created
count(kube_job_info{cluster="$cluster",namespace="$namespace"})or on() vector(0)
kube_job_info
kube-state-metrics
Job Active
count(kube_job_status_active{cluster="$cluster",namespace="$namespace"}==1)or on() vector(0)
kube_job_status_active
kube-state-metrics
Cron Job Created
count(kube_cronjob_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_cronjob_created
kube-state-metrics
Cron Job Active
count(kube_cronjob_status_active{cluster="$cluster",namespace="$namespace"}==1) or on() vector(0)
kube_cronjob_status_active
kube-state-metrics
Unbound PVC
count(kube_persistentvolumeclaim_status_phase{phase!="Bound", cluster="$cluster",namespace="$namespace"}==1) or on() vector(0)
kube_persistentvolumeclaim_status_phase
kube-state-metrics
PersistentVolumeClaim Created
count(kube_persistentvolumeclaim_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_persistentvolumeclaim_info
kube-state-metrics
Service Created
count(kube_service_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_service_info
kube-state-metrics
LoadBalancer Created
count(kube_service_spec_type{type="LoadBalancer", cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_service_spec_type
kube-state-metrics
Ingress Created
count(kube_ingress_info{cluster="$cluster",namespace="$namespace"})or on() vector(0)
kube_ingress_info
kube-state-metrics
ConfigMap Created
count(kube_configmap_info{cluster="$cluster",namespace="$namespace"})
kube_configmap_info
kube-state-metrics
Secret Created
count(kube_secret_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_secret_info
kube-state-metrics
PVC Storage Requests Total
sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_persistentvolumeclaim_resource_requests_storage_bytes
kube-state-metrics
Pod NotReady
count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace) - (count(kube_pod_status_phase{phase="Succeeded", cluster="$cluster",namespace="$namespace"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace)
kube_pod_status_ready
kube-state-metrics
kube_pod_status_phase
kube-state-metrics
kube_pod_status_ready
kube-state-metrics
Pod UnSchedulable
count(kube_pod_status_unschedulable{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_pod_status_unschedulable
kube-state-metrics
Deployment NotReady
count(sum(kube_deployment_status_replicas_ready{cluster="$cluster",namespace="$namespace"}) by (deployment)<sum(kube_deployment_spec_replicas{cluster="$cluster",namespace="$namespace"}) by (deployment)) or on() vector(0)
kube_deployment_status_replicas_ready
kube-state-metrics
kube_deployment_spec_replicas
kube-state-metrics
Daemonset NotReady
count(kube_daemonset_status_number_unavailable{cluster="$cluster",namespace="$namespace"}>0) or on() vector(0)
kube_daemonset_status_number_unavailable
kube-state-metrics
Job Failed
count(kube_job_status_failed{cluster="$cluster",namespace="$namespace"} == 1)
kube_job_status_failed
kube-state-metrics
CPU Usage
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m])) or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
CPU Quota
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage
sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}) or on() vector(0)
container_memory_working_set_bytes
cadvisor
Memory Usage/Request(%)
sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Usage/Limit(%)
sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Request
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"})
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Limit
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"})
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Cluster Available
sum(sum(kube_node_status_capacity{resource="memory"}) by (node) + sum(kube_node_spec_unschedulable==0) by(node)) or on() vector(0)
kube_node_status_capacity
kube-state-metrics
kube_node_spec_unschedulable
kube-state-metrics
Memory Usage (w/o cache)
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}) by (pod)
container_memory_working_set_bytes
cadvisor
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})
kube_resourcequota
kube-state-metrics
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})
kube_resourcequota
kube-state-metrics
Memory Quota
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_working_set_bytes
cadvisor
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}) by (pod)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_rss{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_rss
cadvisor
sum(container_memory_cache{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_cache
cadvisor
sum(container_memory_swap{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_swap
cadvisor
Containers
group by (image, container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_status_running
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_status_restarts_total
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(irate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[1m])) by (pod,container)
kube_pod_container_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max(container_spec_cpu_quota{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000 > 0) by (container,pod)))
kube_pod_container_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
container_spec_cpu_quota
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"})))
kube_pod_container_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_resource_limits
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container)
kube_pod_container_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max(container_spec_memory_limit_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod, container) < 1)
kube_pod_container_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))
kube_pod_container_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics

API Server(独立集群)

图表名称
查询语句
使用的指标
配置文件
Availability > 99.000%
1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m]))
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
ErrorBudget > 99.000%
100 * (1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m])) -0.990000)
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
Read Availability
1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET", cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET", cluster="$cluster"}[5m]))
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
Read SLI - Requests
sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Read SLI - Errors
sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..",cluster="$cluster"}[5m]))/ sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Read SLI - Duration
histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))) > 0
apiserver_request_duration_seconds_bucket
kube-apiserver
Write Availability
1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m]))
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
Write SLI - Requests
sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Write SLI - Errors
sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..",cluster="$cluster"}[5m]))/ sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Write SLI - Duration
histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))) > 0
apiserver_request_duration_seconds_bucket
kube-apiserver
Work Queue Add Rate
sum(rate(workqueue_adds_total{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name)
workqueue_adds_total
kubelet
Work Queue Depth
sum(rate(workqueue_depth{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name)
workqueue_depth
kubelet
Work Queue Latency
histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name, le))
workqueue_queue_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}[5m])
process_cpu_seconds_total
node-exporter

Controller Manager(独立集群)

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{cluster=~"$cluster",job="kube-controller-manager"})
up
kubelet
Work Queue Add Rate
sum(rate(workqueue_adds_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name)
workqueue_adds_total
kubelet
Work Queue Depth
sum(rate(workqueue_depth{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name)
workqueue_depth
kubelet
Work Queue Latency
histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name, le))
workqueue_queue_duration_seconds_bucket
kubelet
Kube API Request Rate
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Get Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter

Kubelet

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{cluster="$cluster", job="kubelet"})
up
kubelet
Running Pods
sum(kubelet_running_pods{cluster="$cluster", job="kubelet", instance=~"$instance"})
kubelet_running_pods
kubelet
Running Container
sum(kubelet_running_containers{cluster="$cluster", job="kubelet", instance=~"$instance"})
kubelet_running_containers
kubelet
Actual Volume Count
sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance", state="actual_state_of_world"})
volume_manager_total_volumes
kubelet
Desired Volume Count
sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance",state="desired_state_of_world"})
volume_manager_total_volumes
kubelet
Config Error Count
sum(rate(kubelet_node_config_error{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m]))
kubelet_node_config_error
kubelet
Operation Rate
sum(rate(kubelet_runtime_operations_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (operation_type, instance)
kubelet_runtime_operations_total
kubelet
Operation Error Rate
sum(rate(kubelet_runtime_operations_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type)
kubelet_runtime_operations_errors_total
kubelet
Operation duration 99th quantile
histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type, le))
kubelet_runtime_operations_duration_seconds_bucket
kubelet
Pod Start Rate
sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance)
kubelet_pod_start_duration_seconds_count
kubelet
sum(rate(kubelet_pod_worker_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance)
kubelet_pod_worker_duration_seconds_count
kubelet
Pod Start Duration
histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pod_start_duration_seconds_count
kubelet
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pod_worker_duration_seconds_bucket
kubelet
Storage Operation Rate
sum(rate(storage_operation_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin)
storage_operation_duration_seconds_count
kubelet
Storage Operation Error Rate
sum(rate(storage_operation_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin)
storage_operation_errors_total
kubelet
Storage Operation Duration 99th quantile
histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin, le))
storage_operation_duration_seconds_bucket
kubelet
Cgroup manager operation rate
sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type)
kubelet_cgroup_manager_duration_seconds_count
kubelet
Cgroup manager 99th quantile
histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type, le))
kubelet_cgroup_manager_duration_seconds_bucket
kubelet
PLEG relist rate
sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance)
kubelet_pleg_relist_duration_seconds_count
kubelet
PLEG relist interval
histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pleg_relist_interval_seconds_bucket
kubelet
PLEG relist duration
histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pleg_relist_duration_seconds_bucket
kubelet
RPC Rate
sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
Request duration 99th quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster="$cluster",job="kubelet", instance=~"$instance"}[5m])) by (instance, verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{cluster="$cluster",job="kubelet",instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter
Goroutines
go_goroutines{cluster="$cluster",job="kubelet",instance=~"$instance"}
go_goroutines
node-exporter

Proxy(非默认安装组件)

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{job="kube-proxy"})
up
kubelet
Rules Sync Rate
sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m]))
kubeproxy_sync_proxy_rules_duration_seconds_count
kube-proxy
Rule Sync Latency 99th Quantile
histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m]))
kubeproxy_sync_proxy_rules_duration_seconds_bucket
kube-proxy
Network Programming Rate
sum(rate(kubeproxy_network_programming_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m]))
kubeproxy_network_programming_duration_seconds_count
kube-proxy
Network Programming Latency 99th Quantile
histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m])) by (instance, le))
kubeproxy_network_programming_duration_seconds_bucket
kube-proxy
Kube API Request Rate
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Kube API Request Rate
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Get Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{job="kube-proxy",instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{job="kube-proxy",instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter

Scheduler(独立集群)

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{cluster=~"$cluster", job="kube-scheduler"})
up
kubelet
Kube API Request Rate
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Get Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter

集群节点监控详情

图表名称
查询语句
使用的指标
配置文件
服务器资源总览表
node_uname_info{job=~"$job", cluster=~"$cluster"} - 0
node_uname_info
node-exporter
node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - 0
node_memory_MemTotal_bytes
node-exporter
count(node_cpu_seconds_total{job=~"$job",mode='system',cluster=~"$cluster"}) by (instance)
node_cpu_seconds_total
node-exporter
sum(time() - node_boot_time_seconds{job=~"$job",cluster=~"$cluster"})by(instance)
node_boot_time_seconds
node-exporter
max((node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}) *100/(node_filesystem_avail_bytes {job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}+(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"})))by(instance)
node_filesystem_size_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
(1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100
node_cpu_seconds_total
node-exporter
(1 - (node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})))* 100
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
node_load5{job=~"$job",cluster=~"$cluster"}
node_load5
node-exporter
max(irate(node_disk_written_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])) by (instance)
node_disk_written_bytes_total
node-exporter
max(irate(node_network_receive_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance)
node_network_receive_bytes_total
node-exporter
max(irate(node_network_transmit_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance)
node_network_transmit_bytes_total
node-exporter
node_load5{job=~"$job",cluster=~"$cluster"}
node_load5
node-exporter
整体总负载与整体平均 CPU 使用率
count(node_cpu_seconds_total{job=~"$job",cluster=~"$cluster", mode='system'})
node_cpu_seconds_total
node-exporter
sum(node_load5{job=~"$job",cluster=~"$cluster"})
node_load5
node-exporter
avg(1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100
node_cpu_seconds_total
node-exporter
整体总内存与整体平均内存使用率
sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})
node_memory_MemTotal_bytes
node-exporter
sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"})
node_memory_MemTotal_bytes
node-exporter
node_memory_MemAvailable_bytes
node-exporter
(sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"}) / sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"}))*100
node_memory_MemTotal_bytes
node-exporter
node_memory_MemAvailable_bytes
node-exporter
整体总磁盘与整体平均磁盘使用率
sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))
node_filesystem_size_bytes
node-exporter
sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
(sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{job=~"$job",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
运行时间
avg(time() - node_boot_time_seconds{instance=~"$node",cluster=~"$cluster"}) 75
node_boot_time_seconds
node-exporter
CPU 核数
count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'})
node_cpu_seconds_total
node-exporter
总内存
sum(node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})
node_memory_MemTotal_bytes
node-exporter
总 CPU 使用率
100 - (avg(irate(node_cpu_seconds_total{instance=~"$node",mode="idle",cluster=~"$cluster"}[5m])) * 100)
node_cpu_seconds_total
node-exporter
内存使用率
(1 - (node_memory_MemAvailable_bytes{instance=~"$node",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{instance=~"$node",cluster=~"$cluster"})))* 100
node_memory_MemAvailable_bytes
node-exporter
最大分区使用率
(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"})*100 /(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
CPU iowait
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) * 100
node_cpu_seconds_total
node-exporter
各分区可用空间
node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0
node_filesystem_size_bytes
node-exporter
node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0
node_filesystem_avail_bytes
node-exporter
(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
CPU 使用率
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="system"}[5m])) by (instance) *100
node_cpu_seconds_total
node-exporter
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="user"}[5m])) by (instance) *100
node_cpu_seconds_total
node-exporter
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) by (instance) *100
node_cpu_seconds_total
node-exporter
(1 - avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="idle"}[5m])) by (instance))*100
node_cpu_seconds_total
node-exporter
内存信息
node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"}
node_memory_MemTotal_bytes
node-exporter
node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"} - node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"}
node_memory_MemTotal_bytes
node-exporter
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"}
node_memory_MemAvailable_bytes
node-exporter
(1 - (node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"} / (node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})))* 100
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
每秒网络带宽使用
irate(node_network_receive_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8
node_network_receive_bytes_total
node-exporter
irate(node_network_transmit_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8
node_network_transmit_bytes_total
node-exporter
系统平均负载
node_load1{cluster=~"$cluster",instance=~"$node"}
node_load1
node-exporter
node_load5{cluster=~"$cluster",instance=~"$node"}
node_load5
node-exporter
node_load15{cluster=~"$cluster",instance=~"$node"}
node_load15
node-exporter
sum(count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'}) by (cpu,instance)) by(instance)
node_cpu_seconds_total
node-exporter
每秒磁盘读写容量
irate(node_disk_read_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_read_bytes_total
node-exporter
irate(node_disk_written_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_written_bytes_total
node-exporter
磁盘使用率
(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
node_filesystem_files_free{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"} / node_filesystem_files{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"}
node_filesystem_files_free
node-exporter
磁盘读写速率(IOPS)
irate(node_disk_reads_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_reads_completed_total
node-exporter
irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_writes_completed_total
node-exporter
node_disk_io_now{cluster=~"$cluster",instance=~"$node"}
node_disk_io_now
node-exporter
每1秒内 I/O 操作耗时占比
irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_io_time_seconds_total
node-exporter
每次 IO 读写的耗时
irate(node_disk_read_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_reads_completed_total{instance=~"$node"}[5m])
node_disk_read_time_seconds_total
node-exporter
node_disk_reads_completed_total
node-exporter
irate(node_disk_write_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_write_time_seconds_total
node-exporter
node_disk_writes_completed_total
node-exporter
irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_io_time_seconds_total
node-exporter
irate(node_disk_io_time_weighted_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_io_time_weighted_seconds_total
node-exporter
网络 Socket 连接信息
node_netstat_Tcp_CurrEstab{cluster=~"$cluster",instance=~'$node'}
node_netstat_Tcp_CurrEstab
node-exporter
node_sockstat_TCP_tw{cluster=~"$cluster",instance=~'$node'}
node_sockstat_TCP_tw
node-exporter
node_sockstat_sockets_used{cluster=~"$cluster",instance=~'$node'}
node_sockstat_sockets_used
node-exporter
node_sockstat_UDP_inuse{cluster=~"$cluster",instance=~'$node'}
node_sockstat_UDP_inuse
node-exporter
node_sockstat_TCP_alloc{cluster=~"$cluster",instance=~'$node'}
node_sockstat_TCP_alloc
node-exporter
irate(node_netstat_Tcp_PassiveOpens{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_PassiveOpens
node-exporter
irate(node_netstat_Tcp_ActiveOpens{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_ActiveOpens
node-exporter
irate(node_netstat_Tcp_InSegs{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_InSegs
node-exporter
irate(node_netstat_Tcp_OutSegs{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_OutSegs
node-exporter
irate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_RetransSegs
node-exporter
打开的文件描述符(左 )/每秒上下文切换次数(右)
node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}
node_filefd_allocated
node-exporter
irate(node_context_switches_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_context_switches_total
node-exporter
(node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}/node_filefd_maximum{cluster=~"$cluster",instance=~"$node"}) *100
node_filefd_allocated
node-exporter
node_filefd_maximum
node-exporter

节点 Pod 监控

图表名称
查询语句
使用的指标
配置文件
Pods
count(kube_pod_info{node=~"$node"})
kube_pod_info
kube-state-metrics
Pod Request Memory
sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"})by(node)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Pod Request CPU Cores
sum(kube_pod_container_resource_requests_cpu_cores{node=~"$node"})by(node)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Usage
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
CPU Quota
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node", container!="", container!="POD"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
Memory Quota
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~"$node"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Pod List
group (kube_pod_info{host_ip="$node"})by(created_by_kind, created_by_name,host_network,pod_ip,pod,priority_class,namespace)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(kube_pod_status_phase{}==1) by (pod, phase)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(container_memory_working_set_bytes) by (pod)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(rate(container_cpu_usage_seconds_total{image!=""}[5m])) by (pod)
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(time()-kube_pod_start_time) by (pod)
kube_pod_info
kube-state-metrics
kube_pod_start_time
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) max(kube_pod_status_ready{condition="true"}) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
kube_pod_status_ready
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{image!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_network_receive_bytes_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{image!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_fs_reads_bytes_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_fs_writes_bytes_total
cadvisor

工作负载监控概览

图表名称
查询语句
使用的指标
配置文件
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})
kube_resourcequota
kube-state-metrics
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"})
kube_resourcequota
kube-state-metrics
CPU Quota
count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})
kube_resourcequota
kube-state-metrics
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})
kube_resourcequota
kube-state-metrics
Memory Quota
count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标 Deployment

Deployment

图表名称
查询语句
使用的指标
配置文件
Age
time() - max(kube_deployment_created{cluster="$cluster",namespace="$namespace",deployment="$workload"})
kube_deployment_created
kube-state-metrics
Replicas(Pods)-Request
max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"})
kube_deployment_spec_replicas
kube-state-metrics
Replicas(Pods)-Ready
max(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"})
kube_deployment_status_replicas_ready
kube-state-metrics
Replica Trend
max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_spec_replicas
kube-state-metrics
max(kube_deployment_status_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas
kube-state-metrics
min(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_ready
kube-state-metrics
min(kube_deployment_status_replicas_available{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_available
kube-state-metrics
min(kube_deployment_status_replicas_updated{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_updated
kube-state-metrics
min(kube_deployment_status_replicas_unavailable{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_unavailable
kube-state-metrics
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Quota
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Limit-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Request-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Info
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Usage/Limit (%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Usage/Request(%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU User Time(%)
avg(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container))) by (pod,container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
container_cpu_usage_seconds_total
cadvisor
container_cpu_usage_seconds_total
cadvisor
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Quota
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Limit-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_spec_memory_limit_bytes
cadvisor
Memory Request-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Info
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_memory_working_set_bytes
cadvisor
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Limit(%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Request(%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Sockets
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_sockets
cadvisor
Network In
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_receive_bytes_total
cadvisor
Network Out
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
Network Errors
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod)))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_receive_errors_total
cadvisor
container_network_transmit_errors_total
cadvisor
Network IO
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_receive_bytes_total
cadvisor
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
File System Read
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_fs_reads_bytes_total
cadvisor
File System Write
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_fs_writes_bytes_total
cadvisor

StatefulSet

图表名称
查询语句
使用的指标
配置文件
Generation
max(kube_statefulset_metadata_generation{cluster="$cluster",namespace="$namespace", statefulset="$workload"})
kube_statefulset_metadata_generation
kube-state-metrics
Replicas(Pods)-Request
max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"})
kube_statefulset_replicas
kube-state-metrics
Replicas(Pods)-Ready
max(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"})
kube_statefulset_status_replicas_ready
kube-state-metrics
Age
time() - max(kube_statefulset_created{cluster="$cluster",namespace="$namespace",statefulset="$workload"})
kube_statefulset_created
kube-state-metrics
Replica Trend
max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_replicas
kube-state-metrics
max(kube_statefulset_status_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas
kube-state-metrics
min(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas_ready
kube-state-metrics
min(kube_statefulset_status_replicas_available{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas_available
kube-state-metrics
min(kube_statefulset_status_replicas_updated{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas_updated
kube-state-metrics
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Quota
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Limit-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Request-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Info
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image)
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Usage/Limit (%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Usage/Request(%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU User Time(%)
avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container,image))) by (pod,container,image)
kube_pod_info
kube-state-metrics
container_cpu_user_seconds_total
cadvisor
container_cpu_user_seconds_total
cadvisor
container_cpu_system_seconds_total
cadvisor
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Quota
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Limit-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod))
kube_pod_info
kube-state-metrics
container_spec_memory_limit_bytes
cadvisor
Memory Request-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Info
avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod, image) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"}))by (container, pod, image)
kube_pod_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod, image) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Limit(%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Request(%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Sockets
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
kube_pod_info
kube-state-metrics
container_sockets
cadvisor
Network In
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
container_network_receive_bytes_total
cadvisor
Network Out
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
Network Errors
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod)))
kube_pod_info
kube-state-metrics
container_network_receive_errors_total
cadvisor
container_network_transmit_errors_total
cadvisor
Network IO
sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
container_network_receive_bytes_total
cadvisor
-sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
container_network_transmit_bytes_total
cadvisor

DaemonSet

图表名称
查询语句
使用的指标
配置文件
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Quota
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Quota
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标

集群 Pod 监控

图表名称
查询语句
使用的指标
配置文件
Age
time() - max(kube_pod_created{pod=~"$pod",cluster="$cluster",namespace="$namespace"})
kube_pod_created
kube-state-metrics
Restart Count-Last 1 Hour
ceil(sum (increase(kube_pod_container_status_restarts_total{pod=~"$pod",cluster="$cluster",namespace="$namespace"}[1h])))
kube_pod_container_status_restarts_total
kube-state-metrics
Requests-CPU
sum(kube_pod_container_resource_requests_cpu_cores{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
Requests-Memory
sum(kube_pod_container_resource_requests_memory_bytes{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Limits-CPU
sum(kube_pod_container_resource_limits_cpu_cores{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Limits-Memory
sum(kube_pod_container_resource_limits_memory_bytes{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Containers
group by (image, container,pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_info
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_status_running
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_limits
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_limits{resource="memory",cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_limits
kube-state-metrics
max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_status_restarts_total
kube-state-metrics
CPU Usage (%)
max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (container,namespace,pod) / max(container_spec_cpu_quota{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000) by (container,namespace,pod) or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
container_spec_cpu_quota
cadvisor
CPU Usage By Cores
max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (pod,container,namespace)or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
CPU Load (10s)
max(container_cpu_load_average_10s{namespace=~"$namespace", pod=~"$pod", container!="", container!="POD"} / 1000)by(pod,container)
container_cpu_load_average_10s
cadvisor
CPU Throttled Percent
max (rate (container_cpu_cfs_throttled_seconds_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) / max (rate (container_cpu_cfs_periods_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) or on() vector(0)
container_cpu_cfs_throttled_seconds_total
cadvisor
container_cpu_cfs_periods_total
cadvisor
CPU Quota
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage (WSS)
max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
container_memory_working_set_bytes
cadvisor
Memory Usage
max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
container_memory_usage_bytes
cadvisor
Memory Usage (RSS)
max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) or on() vector(0)
container_memory_rss
cadvisor
Memory Cache
max(container_memory_cache{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
container_memory_cache
cadvisor
Usage WSS/Limit (%)
(max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Usage/Limit (%)
(max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
container_memory_usage_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Usage RSS/Limit (%)
(max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ sum(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
container_memory_rss
cadvisor
container_spec_memory_limit_bytes
cadvisor
Memory Failcnt
max (increase(container_memory_failcnt{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (pod,container)
container_memory_failcnt
cadvisor
Memory Quota
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)
container_memory_working_set_bytes
cadvisor
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Network Input
max (rate (container_network_receive_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m])) by(pod)
container_network_receive_bytes_total
cadvisor
Network Output
max (rate (container_network_transmit_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m]))by(pod)
container_network_transmit_bytes_total
cadvisor
Network Input Error (%)
max (increase (container_network_receive_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_receive_packets_dropped_total
cadvisor
container_network_receive_packets_total
cadvisor
max (increase (container_network_receive_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_receive_errors_total
cadvisor
container_network_receive_packets_total
cadvisor
Network Output Error (%)
max (increase (container_network_transmit_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_transmit_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_transmit_packets_dropped_total
cadvisor
container_network_transmit_packets_total
cadvisor
max (increase (container_network_transmit_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_transmit_errors_total
cadvisor
container_network_receive_packets_total
cadvisor
File System Read
max (rate(container_fs_reads_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m]))by (container,pod)
container_fs_reads_bytes_total
cadvisor
File System Write
max (rate(container_fs_writes_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (container,pod)
container_fs_writes_bytes_total
cadvisor
Network Socket
max(container_sockets{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod)
container_sockets
cadvisor
Process Number
count(container_processes{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod)
container_processes
cadvisor

集群网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
Current Rate of Bytes Transmitted
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
Current Status
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_packets_total
cadvisor
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_total
cadvisor
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_total
cadvisor
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_dropped_total
cadvisor
Average Rate of Bytes Received
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
Average Rate of Bytes Transmitted
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
Receive Bandwidth
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
Transmit Bandwidth
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
Rate of Received Packets
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_packets_total
cadvisor
Rate of Transmitted Packets
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_total
cadvisor
Rate of Received Packets Dropped
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_packets_dropped_total
cadvisor
Rate of Transmitted Packets Dropped
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_dropped_total
cadvisor
Rate of TCP Retransmits out of all sent segments
sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[5m]) / rate(node_netstat_Tcp_OutSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance))
node_netstat_Tcp_RetransSegs
node-exporter
Rate of TCP SYN Retransmits out of all retransmits
sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=~"$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance))
node_netstat_TcpExt_TCPSynRetrans
node-exporter
node_netstat_Tcp_RetransSegs
node-exporter

命名空间 Pods 网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]))
container_network_receive_bytes_total
cadvisor
Current Rate of Bytes Transmitted
sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[5m]))
container_network_transmit_bytes_total
cadvisor
Current Status
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_bytes_total
cadvisor
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_bytes_total
cadvisor
sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_total
cadvisor
sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_total
cadvisor
sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_dropped_total
cadvisor
sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_dropped_total
cadvisor
Receive Bandwidth
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_bytes_total
cadvisor
Transmit Bandwidth
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_bytes_total
cadvisor
Rate of Received Packets
sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_total
cadvisor
Rate of Transmitted Packets
sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_total
cadvisor
Rate of Received Packets Dropped
sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_dropped_total
cadvisor
Rate of Transmitted Packets Dropped
sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster", namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_dropped_total
cadvisor

命名空间工作负载网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Current Rate of Bytes Transmitted
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Current Status
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Received
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Transmitted
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Receive Bandwidth
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Transmit Bandwidth
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Received Packets
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Transmitted Packets
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Received Packets Dropped
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Transmitted Packets Dropped
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标

Pod 网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m]))
container_network_receive_bytes_total
cadvisor
Current Rate of Bytes Transmitted
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m]))
container_network_transmit_bytes_total
cadvisor
Receive Bandwidth
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_receive_bytes_total
cadvisor
Transmit Bandwidth
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_transmit_bytes_total
cadvisor
Rate of Received Packets
sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_receive_packets_total
cadvisor
Rate of Transmitted Packets
sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_transmit_packets_total
cadvisor
Rate of Received Packets Dropped
sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_receive_packets_dropped_total
cadvisor
Rate of Transmitted Packets Dropped
sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_transmit_packets_dropped_total
cadvisor

工作负载网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
cadvisor
Current Rate of Bytes Transmitted
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Received
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Transmitted
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Receive Bandwidth
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Transmit Bandwidth
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标


Rate of Received Packets
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Transmitted Packets
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Received Packets Dropped
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_packets_dropped_total
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
cadvisor
Rate of Transmitted Packets Dropped
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标

PVC 存储监控

图表名称
查询语句
使用的指标
配置文件
Volume Space Usage
( sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) - sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) )
kubelet_volume_stats_capacity_bytes
kubelet
kubelet_volume_stats_available_bytes
kubelet
sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
kubelet_volume_stats_available_bytes
kubelet
Volume Space Usage
( kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} - kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} ) / kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} * 100
kubelet_volume_stats_capacity_bytes
kubelet
kubelet_volume_stats_available_bytes
kubelet
kubelet_volume_stats_capacity_bytes
kubelet
Volume inodes Usage
sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
kubelet_volume_stats_inodes_used
kubelet
( sum without(instance, node) (kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) - sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) )
kubelet_volume_stats_inodes
kubelet
kubelet_volume_stats_inodes_used
kubelet
Volume inodes Usage
kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} / kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} * 100
kubelet_volume_stats_inodes_used
kubelet
kubelet_volume_stats_inodes
kubelet


帮助和支持

本页内容是否解决了您的问题?

填写满意度调查问卷,共创更好文档体验。

文档反馈