三、MySQL性能监控
二、MySQL性能监控
1)脚本:run_orzdba.sh #每间隔20秒收集一次数据 到相应的文件中
run_orzdba.sh
#!/bin/bash
HOST=localhost
PORT=3306
USER=orzuser
PASSWORD="XXXXXX"
INTERVAL=20
OUTPUT=/tmp/run_status
datetime=`date +%Y%m%d%H%m`
if [ ! -d "/tmp/run_status/" ];then
mkdir -p /tmp/run_status/
fi
function eg {
while read line
do
printf "$line"
date '+ %Y%m%d %H:%M:%S'
done
}
/data/software/script/orzdba -sys -rt -i $INTERVAL -L $OUTPUT/orzdba_sys_rt.txt & #通过orzdba工具收集CPU和RT相关数据
/data/software/script/orzdba -innodb -i $INTERVAL -L $OUTPUT/orzdba_innodb.txt & #通过orzdba工具收集 Innodb相关数据
/data/software/script/orzdba -mysql -i $INTERVAL -L $OUTPUT/orzdba_mysql.txt & #通过orzdba工具收集mysql相关数据
iostat -mxtd sda sdb $INTERVAL >> $OUTPUT/iostat.txt & #通过iostat命令收集IO相关数据
-------- -----load-avg---- ---cpu-usage--- ---swap--- --------tcprstat(us)--------
time | 1m 5m 15m |usr sys idl iow| si so| count avg 95-avg 99-avg|
11:49:41| 3.33 3.04 2.87| 8 2 89 2| 0 0| 0 0 0 0|
11:49:42| 3.33 3.04 2.87| 55 4 39 1| 0 0| 2301 988 538 761|
11:49:43| 3.33 3.04 2.87| 43 3 52 1| 0 0| 2170 1184 570 826|
time | data free dirty flush| reads writes read written|fsyncs written| list uflush uckpt view inside que|
11:49:56| 0 0 0 0| 0 0 0 0| 0 0| 0 0 0 0 0 0|
11:49:57| 122688 0 240 0| 2 33 32k 31k| 1 31k| 3300 4k 71k 5 0 0|
11:49:58| 122688 0 272 0| 0 22 0 17k| 1 17k| 3318 10 78k 6 0 0|
time | ins upd del sel iud| lor hit| run con cre cac| recv send|
11:50:11| 0 0 0 0 0| 0 100.00| 0 0 0 0| 0 0|
11:50:12| 2 2 0 637 4| 3693678 100.00| 3 14 0 56| 253k 12.2m|
11:50:13| 2 1 0 982 3| 4349953 100.00| 5 14 0 56| 418k 16.4m|
sda 0.16 129.70 4.53 29.01 0.05 0.62 40.80 0.03 0.77 0.14 0.45
sda 0.00 37.00 3.00 29.00 0.01 0.26 17.25 1.74 54.19 4.50 14.40
sda 0.00 26.00 1.00 23.00 0.00 0.19 16.67 0.95 39.79 4.58 11.00
#!/usr/bin/python
import os
pathname=os.getcwd()
sys.path.append(pathname)
import run_innodb
import run_io
import run_mysql
import run_sys
run_mysql.mysql() #run_mysql.py模块, 处理orzdba_mysql.txt文件 获取相关项目的最大值和平均值
run_innodb.innodb() #run_innodb.py模块,处理orzdba_innodb.txt文件
run_io.io() #run_io.py模块,处理iostat.txt文件
run_sys.sys() #run_sys.py模块,处理orzdba_sys_rt.txt文件
# 脚本输出 #输出数据到文件baseline.txt,再跟实际情况修改max值
Max QPS: 1804 Avg QPS: 777 #QPS:每秒查询量
Max TPS: 110 Avg TPS: 7 #TPS:每秒事务量
Max Thread run: 42 Avg Thread run: 4 #Thread run:当前活跃连接数
Max Thread con: 50 Avg Thread con: 13 #Thread con:当前连接数
******The MySQL Innodb Info:
Max diryt page: 729 Avg dirty page: 134 #innodb buffer pool(内存)中的脏页(尽量缓存热数据,尽快刷新脏数据)
Max data reads: 1682 Avg data reads: 65 #读取内存中数据的次数
Max data writes: 80 Avg data reads: 14 #写入内存的次数
Max log writen: 58000 Avg log written: 6015 #innodb redo写入的量(太大,说明事务并发高,或者事务较大)
******The I/O Info:
Max r/s: 5.15 Avg r/s: 1.50
Max w/s: 11.95 Avg w/s: 21.26
Max rMB/s: 0.36 Avg rMB/s: 0.01 #每秒读磁盘大小(兆),随机读
Max wMB/s: 20.38 Avg wMB/s: 0.44 #每秒写磁盘大小(兆)
Max await: 492.57 Avg await: 49.50 #平均一个IO的等待时间(包含服务时间)
Max svctm: 17.22 Avg svctm: 6.39 #平均一个IO的服务时间
Max %util: 65.78 Avg %util: 10.37 #IO能力的参考值(100%时,说明磁盘IO被打满)
******The CPU Info:
Max Load Avg 1m: 8.40 Avg Load Avg 1m: 2.74 #CPU 1m内的负载,建议不超过cpu核数
Max Load Avg 5m: 4.73 Avg Load Avg 5m: 2.71 #CPU 5m内的负载,建议不超过cpu核数*1.5
Max Load Avg 15m: 3.66 Avg Load Avg 15m: 2.71 #15m内负载,不超过cpu核数 * 2
Max CPU user: 83 Avg CPU user: 17 #mysqld进程占用cpu 比例(太高时可能是慢查询或高并发导致)
Max CPU sys: 3 Avg CPU sys: 1 #系统重要cpu比例
Min CPU idle: 15 Avg CPU idle: 79 #cpu空闲比例
Max MySQL RT: 78051 Avg MySQL RT: 1146 #mysql响应时间(根据tcprstat得来,连接数越大,性能下降越厉害,rt越大)
cat run_monitor_sec.py
#!/usr/bin/python
# -*- coding:UTF-8 -*-
import os
import smtplib
from email.MIMEText import MIMEText
from email.Header import Header
import socket
import struct
import fcntl
import time
SDT=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) #获取当前时间
sender = 'xiaochou13f@126.com' #邮件发送人
receivers = ['zhiming.han@fumubang.com'] #收件人
pathname='/tmp/run_status/' #文件路径
filename=['iostat.txt','orzdba_innodb.txt','orzdba_mysql.txt','orzdba_sys_rt.txt'] #文件列表
def startOrz(): #定时 6点启动run_orzdba.sh
if SDT[11:] > "06:00:00" and SDT[11:] < "06:00:20":
os.system('/bin/bash /data/software/script/performance_monitor/run_orzdba.sh') #开始收集数据脚本
def stopOrz(): #定时 23点关闭kill_orzdba.sh,且归档昨当天数据
if SDT[11:] > "23:00:00" and SDT[11:] < "23:00:20":
os.system('/bin/bash /data/software/script/performance_monitor/kill_orzdba.sh') #结束收集数据脚本
for i in filename:
str01 = "cat "+ pathname + i + " > " + pathname + i + "_" + SDT[:10]
str02 = "cat /dev/null > " + pathname + i
os.system(str01)
os.system(str02)
startOrz()
stopOrz()
def getip(ethname): #获取eth0的IP
s=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(s.fileno(), 0X8915, struct.pack('256s', ethname[:15]))[20:24])
title='mail from:'+getip('eth0')+',Warning!' #邮件标题
def mail(content,title): #邮件函数
message = MIMEText(content,'plain', 'utf-8')
message['From'] = Header("root", 'utf-8')
message['To'] = Header("".join(receivers), 'utf-8')
subject = title
message['Subject'] = Header(subject, 'utf-8')
try:
smtpObj = smtplib.SMTP('localhost')
smtpObj.sendmail(sender, receivers, message.as_string())
print "邮件发送成功"
except smtplib.SMTPException:
print "Error: 无法发送邮件"
#find the last second line into "dict_comp" Dictionary value.
dict_comp={} #定义空字典dict_comp
for i in filename: #循环文件列表
f=open(pathname+i,'r+') #打开文件
off=-50
n = 2
while True: #取出文件最后倒数第2行数据追加到字典dict_comp中,如果该行不是需要的数据则取倒数第3行。。。
f.seek(off,2)
l=f.readlines()
if len(l)>=n+1:
last_line = l[-n]
if last_line.find("time") == -1 and last_line.find("innodb")==-1 and last_line.find("QPS")==-1 and last_line.find("avg")==-1 and last_line.find("Device")==-1 and last_line.find("Time")==-1 and last_line != '\n':
break
n += 1
off *= 2
f.close()
# print "file %s last line is:\n%s" %(i,last_line)
dict_comp[i] = last_line
f=open('/tmp/run_status/baseline.txt','r')
baseline=[i.replace("\n","").replace("\t",":") for i in f.readlines() if i.find("Avg")!=-1 ]
f.close()
#读取baseline.txt中的数据,格式存入列表中
def funcComp(num): #获取需要对比的数据的指定列,并格式化
return value.split()[num].replace("|","").replace("k","000")
def funcBase(name): #获取基线文件的指定数据,并格式化
return [ele.split(":")[1] for ele in baseline if ele.find(name)!=-1][0]
par=0 #定义状态值,循环结束后如状态值改变,说明有监控项查过阈值,邮件报警;否则返回正常
listResu=[] #定空列表,将超过阈值的监控项追加到列表中
for key,value in dict_comp.iteritems(): #循环需要对比的数据的集合
if key=='orzdba_mysql.txt': #判断文件
listResu.append("******The MySQL Info:")
#QPS Check
if int(funcComp(4)) > int(funcBase("QPS")): #监测QPS每秒查询量。如果当前QPS大于基线值,则改变状态值,将该监控项追加到列表中
content="The QPS more than max value! current:%d max:%d" %(int(funcComp(4)),int(funcBase("QPS")))
par = 1
listResu.append(content)
#TPS Check #监控TPS每秒事务量。......
if int(funcComp(5)) > int(funcBase("TPS")):
content="The TPS more than max value! current:%d max:%d" %(int(funcComp(5)),int(funcBase("TPS")))
par = 1
listResu.append(content)
...... #省略其他监控项
if key=='orzdba_innodb.txt':
...... #省略
if key=='iostat.txt':
......
if key=='orzdba_sys_rt.txt':
......
if par == 1: #判断状态值是否改变,如果改变则发送邮件,附加基线信息和当前状态信息;否则返回正常
content=SDT+"\n"+"\n".join(listResu)+"\n\n"+"\n".join(baseline)
print content
mail(content,title)
else:
print SDT+'\n'+"the mysql and os is normal"
4) 脚本:cron_monitor.sh #20秒执行一次探测
- cat cron_monitor.sh
#!/bin/bash
for ((i=1; i<=3; i++));do
/data/software/script/performance_monitor/run_monitor_sec.py >> /data/software/script/performance_monitor/run_monitor_sec.log 2>&1
# echo $i
sleep 20
done
- 定时任务 检测时间段:每天6点至23点
- 6-23 * * * /bin/bash /data/software/script/performance_monitor/cron_monitor.sh