Add raid_report.py

This commit is contained in:
gru
2026-03-16 15:08:01 +01:00
parent 2c22195268
commit dcd0e534e1

165
raid_report.py Normal file
View File

@@ -0,0 +1,165 @@
#!/usr/bin/env python3
"""
Dell MegaRAID Status Report - Clean Table Only
"""
import subprocess
import re
import sys
import csv
from datetime import datetime
import os
import argparse
NAGIOS_OK = 0; NAGIOS_WARNING = 1; NAGIOS_CRITICAL = 2; NAGIOS_UNKNOWN = 3
def run_megacli(cmd):
try:
result = subprocess.run(['/opt/MegaRAID/MegaCli/MegaCli64'] + cmd.split(),
capture_output=True, text=True, timeout=30)
return result.stdout.strip() if result.returncode == 0 else None
except:
return None
def parse_pdlist(output):
disks = []
if not output:
return disks
lines = output.splitlines()
i = 0
while i < len(lines):
line = lines[i].rstrip()
# Slot detection
slot_match = re.search(r'Slot Number:\s*(\d+)', line)
if slot_match:
disk = {'slot': slot_match.group(1)}
# Parse next block for all fields
block_end = min(i + 35, len(lines))
block_lines = lines[i:block_end]
block_text = ' '.join(block_lines)
# Device ID
devid_match = re.search(r'Device Id:\s*(\d+)', block_text)
disk['devid'] = devid_match.group(1) if devid_match else 'N/A'
# Model - clean inquiry data
model_match = re.search(r'Inquiry Data:\s*([^\s].*?)(?=\s{10,}|\Z)', block_text)
disk['model'] = model_match.group(1).strip()[:24] if model_match else 'N/A'
# State
state_match = re.search(r'Firmware state:\s*([^\r\n]{1,30})', block_text)
disk['state'] = state_match.group(1).strip() if state_match else 'N/A'
# Error counters
disk['media_err'] = re.search(r'Media Error Count:\s*(\d+)', block_text).group(1) if re.search(r'Media Error Count:\s*(\d+)', block_text) else '0'
disk['pred_fail'] = re.search(r'Predictive Failure Count:\s*(\d+)', block_text).group(1) if re.search(r'Predictive Failure Count:\s*(\d+)', block_text) else '0'
# SMART alert - exact match
smart_match = re.search(r'Drive has flagged a S\.M\.A\.R\.T alert\s*:\s*(Yes|No)', block_text)
disk['smart'] = smart_match.group(1) if smart_match else 'N/A'
# TEMPERATURE - FIXED regex for "Drive Temperature :40C (104.00 F)"
temp_match = re.search(r'Drive Temperature\s*:\s*(\d+)C', block_text)
if temp_match:
disk['temp'] = f"{temp_match.group(1)}C"
else:
# Fallback for different formats
temp_fallback = re.search(r'Temperature\s*:\s*(\d+)', block_text)
disk['temp'] = f"{temp_fallback.group(1)}C" if temp_fallback else 'N/A'
# Health calculation
me = int(disk['media_err'])
pf = int(disk['pred_fail'])
disk['health'] = ('CRITICAL' if (me > 10 or pf > 0 or disk['smart'] == 'Yes')
else 'WARNING' if me > 0 else 'OK')
disks.append(disk)
i += 30
else:
i += 1
return disks
def print_table(disks):
print("\nRAID STATUS REPORT")
print("-" * 104)
print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S CET')} | Total Disks: {len(disks)}")
print("-" * 104)
print(f"{'Slot':<4} {'ID':<3} {'Model':<25} {'State':<20} {'MediaErr':<9} {'PredFail':<8} "
f"{'SMART':<5} {'Temp':<6} {'Health':<10}")
print("-" * 104)
stats = {'CRITICAL': 0, 'WARNING': 0, 'OK': 0}
for disk in disks:
health = disk['health']
stats[health] += 1
color = {'CRITICAL': '\033[91m', 'WARNING': '\033[93m', 'OK': '\033[92m'}[health]
print(f"{color}{disk['slot']:<4} {disk['devid']:<3} {disk['model']:<25} "
f"{disk['state'][:19]:<20} {disk['media_err']:<9} {disk['pred_fail']:<8} "
f"{disk['smart']:<5} {disk['temp']:<6} {health:<10}\033[0m")
print("-" * 104)
print(f"SUMMARY: CRITICAL={stats['CRITICAL']:2d} WARNING={stats['WARNING']:2d} "
f"OK={stats['OK']:2d}")
if stats['CRITICAL'] > 0:
print(f"ALERT: {stats['CRITICAL']} disks require immediate attention")
vd_status = get_vd_status()
print(f"Virtual Disks: {vd_status}")
def nagios_check(disks):
critical = sum(1 for d in disks if d['health'] == 'CRITICAL')
warning = sum(1 for d in disks if d['health'] == 'WARNING')
perfdata = (f"total={len(disks)} critical={critical} warning={warning} "
f"media_err_total={sum(int(d['media_err']) for d in disks)}")
if critical > 0:
print(f"CRITICAL: {critical} critical disks | {perfdata}")
sys.exit(NAGIOS_CRITICAL)
elif warning > 0:
print(f"WARNING: {warning} warning disks | {perfdata}")
sys.exit(NAGIOS_WARNING)
else:
print(f"OK: All {len(disks)} disks healthy | {perfdata}")
sys.exit(NAGIOS_OK)
def save_csv(disks):
filename = f"raid-report-{datetime.now().strftime('%Y%m%d-%H%M')}.csv"
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['slot','devid','model','state','media_err',
'pred_fail','smart','temp','health'])
writer.writeheader()
writer.writerows(disks)
print(f"\nCSV exported: {os.path.abspath(filename)}")
def get_vd_status():
vd = run_megacli('-LDInfo -Lall -aALL')
if not vd:
return "Unknown"
if 'Optimal' in vd:
return "Optimal"
if any(s in vd for s in ['Degraded', 'Rebuild']):
return "Degraded/Rebuild"
return "Check manually"
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MegaRAID Monitor - Clean Output')
parser.add_argument('--check', action='store_true', help='Nagios mode')
parser.add_argument('--csv', action='store_true', help='Save CSV')
args = parser.parse_args()
pd_data = run_megacli('-PDList -aALL')
disks = parse_pdlist(pd_data)
if args.check:
nagios_check(disks)
else:
print_table(disks)
if args.csv:
save_csv(disks)