166 lines
6.2 KiB
Python
166 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Dell MegaRAID Status Report - Clean Table Only
|
|
"""
|
|
|
|
import subprocess
|
|
import re
|
|
import sys
|
|
import csv
|
|
from datetime import datetime
|
|
import os
|
|
import argparse
|
|
|
|
NAGIOS_OK = 0; NAGIOS_WARNING = 1; NAGIOS_CRITICAL = 2; NAGIOS_UNKNOWN = 3
|
|
|
|
def run_megacli(cmd):
|
|
try:
|
|
result = subprocess.run(['/opt/MegaRAID/MegaCli/MegaCli64'] + cmd.split(),
|
|
capture_output=True, text=True, timeout=30)
|
|
return result.stdout.strip() if result.returncode == 0 else None
|
|
except:
|
|
return None
|
|
|
|
def parse_pdlist(output):
|
|
disks = []
|
|
if not output:
|
|
return disks
|
|
|
|
lines = output.splitlines()
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i].rstrip()
|
|
|
|
# Slot detection
|
|
slot_match = re.search(r'Slot Number:\s*(\d+)', line)
|
|
if slot_match:
|
|
disk = {'slot': slot_match.group(1)}
|
|
|
|
# Parse next block for all fields
|
|
block_end = min(i + 35, len(lines))
|
|
block_lines = lines[i:block_end]
|
|
block_text = ' '.join(block_lines)
|
|
|
|
# Device ID
|
|
devid_match = re.search(r'Device Id:\s*(\d+)', block_text)
|
|
disk['devid'] = devid_match.group(1) if devid_match else 'N/A'
|
|
|
|
# Model - clean inquiry data
|
|
model_match = re.search(r'Inquiry Data:\s*([^\s].*?)(?=\s{10,}|\Z)', block_text)
|
|
disk['model'] = model_match.group(1).strip()[:24] if model_match else 'N/A'
|
|
|
|
# State
|
|
state_match = re.search(r'Firmware state:\s*([^\r\n]{1,30})', block_text)
|
|
disk['state'] = state_match.group(1).strip() if state_match else 'N/A'
|
|
|
|
# Error counters
|
|
disk['media_err'] = re.search(r'Media Error Count:\s*(\d+)', block_text).group(1) if re.search(r'Media Error Count:\s*(\d+)', block_text) else '0'
|
|
disk['pred_fail'] = re.search(r'Predictive Failure Count:\s*(\d+)', block_text).group(1) if re.search(r'Predictive Failure Count:\s*(\d+)', block_text) else '0'
|
|
|
|
# SMART alert - exact match
|
|
smart_match = re.search(r'Drive has flagged a S\.M\.A\.R\.T alert\s*:\s*(Yes|No)', block_text)
|
|
disk['smart'] = smart_match.group(1) if smart_match else 'N/A'
|
|
|
|
# TEMPERATURE - FIXED regex for "Drive Temperature :40C (104.00 F)"
|
|
temp_match = re.search(r'Drive Temperature\s*:\s*(\d+)C', block_text)
|
|
if temp_match:
|
|
disk['temp'] = f"{temp_match.group(1)}C"
|
|
else:
|
|
# Fallback for different formats
|
|
temp_fallback = re.search(r'Temperature\s*:\s*(\d+)', block_text)
|
|
disk['temp'] = f"{temp_fallback.group(1)}C" if temp_fallback else 'N/A'
|
|
|
|
# Health calculation
|
|
me = int(disk['media_err'])
|
|
pf = int(disk['pred_fail'])
|
|
disk['health'] = ('CRITICAL' if (me > 10 or pf > 0 or disk['smart'] == 'Yes')
|
|
else 'WARNING' if me > 0 else 'OK')
|
|
|
|
disks.append(disk)
|
|
i += 30
|
|
else:
|
|
i += 1
|
|
return disks
|
|
|
|
def print_table(disks):
|
|
print("\nRAID STATUS REPORT")
|
|
print("-" * 104)
|
|
print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S CET')} | Total Disks: {len(disks)}")
|
|
print("-" * 104)
|
|
print(f"{'Slot':<4} {'ID':<3} {'Model':<25} {'State':<20} {'MediaErr':<9} {'PredFail':<8} "
|
|
f"{'SMART':<5} {'Temp':<6} {'Health':<10}")
|
|
print("-" * 104)
|
|
|
|
stats = {'CRITICAL': 0, 'WARNING': 0, 'OK': 0}
|
|
for disk in disks:
|
|
health = disk['health']
|
|
stats[health] += 1
|
|
|
|
color = {'CRITICAL': '\033[91m', 'WARNING': '\033[93m', 'OK': '\033[92m'}[health]
|
|
|
|
print(f"{color}{disk['slot']:<4} {disk['devid']:<3} {disk['model']:<25} "
|
|
f"{disk['state'][:19]:<20} {disk['media_err']:<9} {disk['pred_fail']:<8} "
|
|
f"{disk['smart']:<5} {disk['temp']:<6} {health:<10}\033[0m")
|
|
|
|
print("-" * 104)
|
|
print(f"SUMMARY: CRITICAL={stats['CRITICAL']:2d} WARNING={stats['WARNING']:2d} "
|
|
f"OK={stats['OK']:2d}")
|
|
|
|
if stats['CRITICAL'] > 0:
|
|
print(f"ALERT: {stats['CRITICAL']} disks require immediate attention")
|
|
|
|
vd_status = get_vd_status()
|
|
print(f"Virtual Disks: {vd_status}")
|
|
|
|
def nagios_check(disks):
|
|
critical = sum(1 for d in disks if d['health'] == 'CRITICAL')
|
|
warning = sum(1 for d in disks if d['health'] == 'WARNING')
|
|
|
|
perfdata = (f"total={len(disks)} critical={critical} warning={warning} "
|
|
f"media_err_total={sum(int(d['media_err']) for d in disks)}")
|
|
|
|
if critical > 0:
|
|
print(f"CRITICAL: {critical} critical disks | {perfdata}")
|
|
sys.exit(NAGIOS_CRITICAL)
|
|
elif warning > 0:
|
|
print(f"WARNING: {warning} warning disks | {perfdata}")
|
|
sys.exit(NAGIOS_WARNING)
|
|
else:
|
|
print(f"OK: All {len(disks)} disks healthy | {perfdata}")
|
|
sys.exit(NAGIOS_OK)
|
|
|
|
def save_csv(disks):
|
|
filename = f"raid-report-{datetime.now().strftime('%Y%m%d-%H%M')}.csv"
|
|
with open(filename, 'w', newline='') as f:
|
|
writer = csv.DictWriter(f, fieldnames=['slot','devid','model','state','media_err',
|
|
'pred_fail','smart','temp','health'])
|
|
writer.writeheader()
|
|
writer.writerows(disks)
|
|
print(f"\nCSV exported: {os.path.abspath(filename)}")
|
|
|
|
def get_vd_status():
|
|
vd = run_megacli('-LDInfo -Lall -aALL')
|
|
if not vd:
|
|
return "Unknown"
|
|
if 'Optimal' in vd:
|
|
return "Optimal"
|
|
if any(s in vd for s in ['Degraded', 'Rebuild']):
|
|
return "Degraded/Rebuild"
|
|
return "Check manually"
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(description='MegaRAID Monitor - Clean Output')
|
|
parser.add_argument('--check', action='store_true', help='Nagios mode')
|
|
parser.add_argument('--csv', action='store_true', help='Save CSV')
|
|
args = parser.parse_args()
|
|
|
|
pd_data = run_megacli('-PDList -aALL')
|
|
disks = parse_pdlist(pd_data)
|
|
|
|
if args.check:
|
|
nagios_check(disks)
|
|
else:
|
|
print_table(disks)
|
|
if args.csv:
|
|
save_csv(disks)
|