服务器巡检
本文使用的均为阿里云服务器(多台服务器),使用shell脚本巡检服务器并将巡检结果上传至oss备份
#!/bin/sh
# 服务器巡检
dir='/home/docker_data/backup'
cd $dir
# 日志文件名称
ip=xxx.xxx.xxx.xxx
file_name=$ip'-'$(date +%d)
# OSS保存路径
backup_dir='oss://xx-backup-shenzhen/rd1/product/server/logs/'
# 清空日志文件内容
cat /dev/null > ./logs/$file_name'.log'
# 内存信息从/proc/meminfo文件中读取
# 总内存
mem_total=`cat /proc/meminfo | grep MemTotal | awk '{print $2}'`
# 空闲内存
mem_free=`cat /proc/meminfo | grep MemFree | awk '{print $2}'`
# 可用内存
mem_available=`cat /proc/meminfo | grep MemAvailable | awk '{print $2}'`
# 缓存
cached=`cat /proc/meminfo | grep ^Cached | awk '{print $2}'`
mem_use_rate=$(echo "scale=2;($mem_total-$mem_available)/$mem_total*100"|bc)
# echo "Memory Total:$mem_total KB"
# echo "Memory Free:$mem_free KB"
# echo "Memory Available:$mem_available KB"
# echo "Memory Cached:$cached KB"
echo "Host.mem.usedutilization:$mem_use_rate%" | tee -a ./logs/$file_name'.log'
# CPU信息从/proc/stat文件中读取
# CPU参数列: name | user | nice | system | idle | iowait | irq | softirq | steal | guest | guest_nice
# CPU总时间: user + nice + system + idle + iowait + irq + softirq + steal + guest
# CPU在t1到t2时间段总的使用时间:( user2 + nice2 + system2 + idle2 + iowait2 + irq2 + softirq2)
# - ( user1 + nice1 + system1 + idle1 + iowait1 + irq1 + softirq1)
# CPU在t1到t2时间段空闲使用时间:(idle2 - idle1)
# CPU在t1到t2时间段即时利用率:1 - CPU空闲使用时间 / CPU总的使用时间
# IOWait在t1到t2时间段:(iowait 2-iowait1) / CPU总的使用时间
# t1时总使用时间
cpu_t1=`cat /proc/stat | grep cpu | awk '{print $2+$3+$4+$5+$6+$7+$8+$9+$10}' | head -1`
# t1时空闲使用时间
cpu_idle1=`cat /proc/stat | grep cpu | awk '{print $5}' | head -1`
# CPU采集时间
cpu_sleep=300
sleep $cpu_sleep
# t2时总使用时间
cpu_t2=`cat /proc/stat | grep cpu | awk '{print $2+$3+$4+$5+$6+$7+$8+$9+$10}' | head -1`
# t2时空闲使用时间
cpu_idle2=`cat /proc/stat | grep cpu | awk '{print $5}' | head -1`
# CPU总的使用时间
cpu_t=$(echo "scale=2;$cpu_t2-$cpu_t1"|bc)
# echo "cpu_t:$cpu_t"
# CPU空闲使用时间
cpu_idle=$(echo "scale=2;$cpu_idle2-$cpu_idle1"|bc)
# echo "cpu_idle:$cpu_idle"
# CPU在t1到t2时间段即时利用率
cpu_use_rate=$(echo "scale=5;(1-$cpu_idle/$cpu_t)*100"|bc)
cpu_use_rate=$(echo "scale=2;$cpu_use_rate/1"|bc)
if [ `echo "$cpu_use_rate > 1"|bc` -eq 1 ]
then
echo "Host.cpu.total:$cpu_use_rate%" | tee -a ./logs/$file_name'.log'
else
echo "Host.cpu.total:0$cpu_use_rate%" | tee -a ./logs/$file_name'.log'
fi
# 磁盘占用空间
disk_use_rate_limit=60
df -Ph | grep -vE '^Filesystem|tmpfs|cdrom|overlay|shm' | awk '{print $6, $5}' | while read output;
do
# echo 'Host.disk.utilization:'$output
disk_used=$(echo $output | awk '{print $2}' | sed s/%//g)
disk_partition=$(echo $output | awk '{print $1}')
echo 'Host.disk.utilization:'$disk_partition' | '$disk_used'%' | tee -a ./logs/$file_name'.log'
# if [ $disk_used -ge $disk_use_rate_limit ]; then
# echo "The partition \"$disk_partition\" on $(hostname) has used $disk_used% at $(date)"
# fi
done
df -Pi | grep -vE '^Filesystem|tmpfs|cdrom|overlay|shm' | awk '{print $6, $5}' | while read output;
do
# echo 'inode.disk.utilization:'$output
disk_used=$(echo $output | awk '{print $2}' | sed s/%//g)
disk_partition=$(echo $output | awk '{print $1}')
# echo 'inode.disk.utilization:'$disk_partition' | '$disk_used
if [ $disk_used -ge $disk_use_rate_limit ]; then
# echo "The inode partition \"$disk_partition\" on $(hostname) has used $disk_used% at $(date)"
echo 'inode.disk.utilization:'$disk_partition' | '$disk_used'%' | tee -a ./logs/$file_name'.log'
fi
done
# 上传日志文件到OSS
sleep 3
./ossutil64 cp ./logs/$file_name'.log' $backup_dir -u -f --config-file .ossutilconfig
Python汇总巡检结果并发送邮件
python汇总每台服务器的巡检结果并写入表格,之后发送邮件进行汇报
# -*- coding: utf-8 -*-
import os
import re
import sys
import ssl
import xlrd
import xlwt
import time
import _thread
import requests
import threading
from xlutils.copy import copy
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
from email.header import Header
from email import encoders
from email.utils import parseaddr, formataddr
# 自定义线程
class myThread(threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
# print ("开始线程:" + self.name)
threadLock.acquire()
start(self.counter)
threadLock.release()
# print ("退出线程:" + self.name)
# 文件存放路径
filePath = '/home/server/check/xls/'
# 巡检汇总表格名称
xlsFileName = 'XX组-业务服务器巡检报告模板.xls'
#读取表格文件,留原表格样式
oldWorkBook = xlrd.open_workbook(filePath + xlsFileName, formatting_info = True)
# 新建工作簿
workBook = copy(oldWorkBook)
# 读取指定工作表
ws = workBook.get_sheet('业务及服务器巡检情况')
logUrlPrefix='https://1219044460782086.cn-shenzhen.fc.aliyuncs.com/2016-08-15/proxy/backup-api/log-file/'
# ip数组
ipArray = [
'xx1.xx1.xx1.xx1',
'xx2.xx2.xx2.xx2',
'xx3.xx3.xx3.xx3',
'xx4.xx4.xx4.xx4',
'xx5.xx5.xx5.xx5',
'xx6.xx6.xx6.xx6',
'xx7.xx7.xx7.xx7',
'xx8.xx8.xx8.xx8',
'xx9.xx9.xx9.xx9',
'xx0.xx0.xx0.xx0'
]
# inode检查超过60%的ip
remarkIpStr = ''
# 第三方 SMTP 服务
# 邮件发送服务器
mailHost='smtphm.qiye.163.com'
# SMTP 端口号
mailPort=25
# 邮件发送账号
mailUser='yanfa@xxxxx.com'
# 邮件发送账号密码
mailPasswd='xxxxxxxx'
# 邮件发送邮箱
sender='yanfa@xxxxx.com'
# 邮件接收邮箱
receivers = [
'xxx1@xxxxx.com',
'xxx2@xxxxx.com',
'xxx3@xxxxx.com',
'xxx4@xxxxx.com'
]
title = 'XX组-业务服务器巡检报告'
# 开始方法
def start(counter):
# 当日日期(yyyy-MM-dd格式)
today = time.strftime('%Y-%m-%d')
ip = ipArray[counter]
checkInfos = getCheckInfos(ip)
style = getStyle()
ws.write(counter + 1, 10, today, style)
ws.write(counter + 1, 12, checkInfos[0], style)
remarks = checkInfos[1]
global remarkIpStr
if (remarks != ''):
remarkIpStr += (ip if remarkIpStr == '' else (', ' + ip))
ws.write(counter + 1, 19, remarks, style)
# 根据ip获取服务器信息
def getCheckInfos(ip):
# 巡检信息
checkInfo = '内存使用率: {} \nCPU占用率: {}\n磁盘使用率: \n'
# 根据ip获取服务器信息
def getCheckInfos(ip):
# 巡检信息
checkInfo = '内存使用率: {} \nCPU占用率: {}\n磁盘使用率: \n'
# 内存使用率
memUsedutilization = ''
# CPU占用率
cpuTotal = ''
# 磁盘使用率
diskUtilization = ''
# 备注信息
remarks = ''
# 巡检信息数组
checkInfos = []
r = requests.get(logUrlPrefix + ip + '-' + time.strftime('%d') + '.log')
for line in r.text.split('\n'):
# 分割字符传,返回两个参数的列表
strs = line.split(':', 1)
key = strs[0].strip()
if (key == 'Host.mem.usedutilization'):
# 获取内存使用率
memUsedutilization = strs[1].strip()
elif (key == 'Host.cpu.total'):
# 获取CPU占用率
cpuTotal = strs[1].strip()
elif (key == 'Host.disk.utilization'):
# 磁盘使用率
tempDiskUtilization = strs[1].strip()
tempDiskInfoStrs = tempDiskUtilization.split('|', 1)
diskUtilization += '\t ' + tempDiskInfoStrs[0].strip() + ': ' + \
tempDiskInfoStrs[len(tempDiskInfoStrs) - 1].strip() + '\n'
elif(key.find('inode') >= 0):
remarks = 'inode巡检存在磁盘超过60%的情况'
# 填写巡检信息
checkInfo = checkInfo.format(memUsedutilization, cpuTotal) + diskUtilization
# 拼装巡检信息数组
checkInfos.append(checkInfo)
checkInfos.append(remarks)
return checkInfos
# 获取表格样式
def getStyle():
style = xlwt.XFStyle()
font = xlwt.Font()
# font.name = '微软雅黑'
font.bold = True
# font.height = 360
font.colour_index = xlwt.Style.colour_map['blue']
style.font = font
borders = xlwt.Borders()
# DASHED:虚线, NO_LINE:没有, THIN:实线
borders.top = xlwt.Borders.DASHED
borders.bottom = xlwt.Borders.DASHED
borders.left = xlwt.Borders.DASHED
borders.right = xlwt.Borders.DASHED
style.borders = borders
alignment = xlwt.Alignment()
alignment.horz = xlwt.Alignment.HORZ_CENTER
alignment.vert = xlwt.Alignment.VERT_CENTER
style.alignment = alignment
return style
# 发送邮件
def sendEmail(file):
# 拼接超文本
header = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>'
text = '<body text="#000000"><div>各位领导好:</div><br><div>    本周服务器已巡检\
,服务器各项指标均处于正常范围,详细指标可查看附件!</div><br>'
global remarkIpStr
if (remarkIpStr != ''):
line3 = '注意:服务器 ' + remarkIpStr + ' inode存在超过60%的情况,请运维人员留意!'
else:
line3 = ''
text += '<div><font color=red>    ' + line3 + '</font></div><br>'
th = '<table border="1" cellspacing="0"cellpadding="3" \
bordercolor="#000000" width="850" align="left" ><tr bgcolor="#F79646" align="center"><th>负责人</th>\
<th>IP</th><th>配置情况</th><th>业务或用途</th><th>巡检时间</th><th>巡检指标与参数</th><th>资源使用情况\
</th><th>资源饱和度</th><th>备注</th></tr>'
# 打开表格
workBook = xlrd.open_workbook(file, formatting_info = True)
# 打开指定工作表
# ws = workBook.get_sheet('业务及服务器巡检情况')
ws = workBook.sheet_by_index(0)
# 表格内容行数(不包含表头)
nrows = ws.nrows - 1
# 表格列数
ncols = ws.ncols
scols = [0, 7, 8, 9, 10, 11, 12, 13, 19]
blueClos = [10, 12]
body = ''
cellData = 1
for i in range(1, nrows + 1):
td = ''
for j in range(ncols):
if (j in scols) :
cellData = ws.cell_value(i, j)
tip = (('<td><font color=blue>' + cellData + '</font></td>') if (j in blueClos) else ('<td>' + cellData + '</td>'))
td += tip
tr = '<tr>' + td + '</tr>'
body = body + tr
tail = '</table></body></html>'
html = header + text + th + body + tail
message = MIMEMultipart()
# message['From'] = Header('服务器巡检机器人', 'utf-8')
message['From'] = Header('服务器巡检机器人 <{}>'.format(sender), 'utf-8')
# message['To'] = Header('研发领导', 'utf-8')
receiversStr = ','.join(receivers)
message['To'] = receiversStr
subject = '阿里云服务器巡检汇报'
message['Subject'] = Header(subject, 'utf-8')
#邮件正文内容
msgtext = MIMEText(html, 'html', 'utf-8')
message.attach(msgtext)
# 构造并添加附件
att1 = MIMEApplication(open(file, 'rb').read())
att1.add_header('Content-Disposition', 'attachment', filename = ('gbk', '', title + '.xls'))
# att1["Content-Type"] = 'application/octet-stream'
# 生成附件的名称
# att1["Content-Disposition"] = 'attachment; filename=XX组-业务服务器巡检报告清单'
encoders.encode_base64(att1)
message.attach(att1)
try:
smtpObj = smtplib.SMTP()
smtpObj.connect(mailHost, mailPort)
smtpObj.login(mailUser, mailPasswd)
smtpObj.sendmail(sender, receivers, message.as_string())
print('邮件发送成功!')
except Exception as e:
print('Error: 无法发送邮件!')
# 创建指定目录
def mkDir(path):
path = path.strip()
path = path.rstrip('\\')
isExist = os.path.exists(path)
if not isExist:
os.makedirs(path)
else:
print('目录已存在,不需要重复创建!')
# Main方法
if __name__ == '__main__':
# 创建目录
mkDir(filePath)
# 线程锁
threadLock = threading.Lock()
# 线程数组
threads = []
# 多线程
for i in range(len(ipArray)):
thread = myThread(i, "Thread-" + str(i), i)
threads.append(thread)
for th in threads:
th.start()
for th in threads:
th.join()
# 保存工作簿
workBook.save(filePath + title + '-' + time.strftime('%Y-%m-%d') + '.xls')
# 发邮件
sendEmail(filePath + title + '-' + time.strftime('%Y-%m-%d') + '.xls')
巡检结果


本文围绕阿里云多台服务器巡检展开,使用shell脚本进行服务器巡检,并将结果上传至oss备份。同时利用Python汇总每台服务器的巡检结果,写入表格后通过邮件进行汇报。

727

被折叠的 条评论
为什么被折叠?



