From dcd0e534e105242e3bf24894f0ebd6c56eece27d Mon Sep 17 00:00:00 2001 From: gru Date: Mon, 16 Mar 2026 15:08:01 +0100 Subject: [PATCH] Add raid_report.py --- raid_report.py | 165 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 raid_report.py diff --git a/raid_report.py b/raid_report.py new file mode 100644 index 0000000..34401a0 --- /dev/null +++ b/raid_report.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +""" +Dell MegaRAID Status Report - Clean Table Only +""" + +import subprocess +import re +import sys +import csv +from datetime import datetime +import os +import argparse + +NAGIOS_OK = 0; NAGIOS_WARNING = 1; NAGIOS_CRITICAL = 2; NAGIOS_UNKNOWN = 3 + +def run_megacli(cmd): + try: + result = subprocess.run(['/opt/MegaRAID/MegaCli/MegaCli64'] + cmd.split(), + capture_output=True, text=True, timeout=30) + return result.stdout.strip() if result.returncode == 0 else None + except: + return None + +def parse_pdlist(output): + disks = [] + if not output: + return disks + + lines = output.splitlines() + i = 0 + while i < len(lines): + line = lines[i].rstrip() + + # Slot detection + slot_match = re.search(r'Slot Number:\s*(\d+)', line) + if slot_match: + disk = {'slot': slot_match.group(1)} + + # Parse next block for all fields + block_end = min(i + 35, len(lines)) + block_lines = lines[i:block_end] + block_text = ' '.join(block_lines) + + # Device ID + devid_match = re.search(r'Device Id:\s*(\d+)', block_text) + disk['devid'] = devid_match.group(1) if devid_match else 'N/A' + + # Model - clean inquiry data + model_match = re.search(r'Inquiry Data:\s*([^\s].*?)(?=\s{10,}|\Z)', block_text) + disk['model'] = model_match.group(1).strip()[:24] if model_match else 'N/A' + + # State + state_match = re.search(r'Firmware state:\s*([^\r\n]{1,30})', block_text) + disk['state'] = state_match.group(1).strip() if state_match else 'N/A' + + # Error counters + disk['media_err'] = re.search(r'Media Error Count:\s*(\d+)', block_text).group(1) if re.search(r'Media Error Count:\s*(\d+)', block_text) else '0' + disk['pred_fail'] = re.search(r'Predictive Failure Count:\s*(\d+)', block_text).group(1) if re.search(r'Predictive Failure Count:\s*(\d+)', block_text) else '0' + + # SMART alert - exact match + smart_match = re.search(r'Drive has flagged a S\.M\.A\.R\.T alert\s*:\s*(Yes|No)', block_text) + disk['smart'] = smart_match.group(1) if smart_match else 'N/A' + + # TEMPERATURE - FIXED regex for "Drive Temperature :40C (104.00 F)" + temp_match = re.search(r'Drive Temperature\s*:\s*(\d+)C', block_text) + if temp_match: + disk['temp'] = f"{temp_match.group(1)}C" + else: + # Fallback for different formats + temp_fallback = re.search(r'Temperature\s*:\s*(\d+)', block_text) + disk['temp'] = f"{temp_fallback.group(1)}C" if temp_fallback else 'N/A' + + # Health calculation + me = int(disk['media_err']) + pf = int(disk['pred_fail']) + disk['health'] = ('CRITICAL' if (me > 10 or pf > 0 or disk['smart'] == 'Yes') + else 'WARNING' if me > 0 else 'OK') + + disks.append(disk) + i += 30 + else: + i += 1 + return disks + +def print_table(disks): + print("\nRAID STATUS REPORT") + print("-" * 104) + print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S CET')} | Total Disks: {len(disks)}") + print("-" * 104) + print(f"{'Slot':<4} {'ID':<3} {'Model':<25} {'State':<20} {'MediaErr':<9} {'PredFail':<8} " + f"{'SMART':<5} {'Temp':<6} {'Health':<10}") + print("-" * 104) + + stats = {'CRITICAL': 0, 'WARNING': 0, 'OK': 0} + for disk in disks: + health = disk['health'] + stats[health] += 1 + + color = {'CRITICAL': '\033[91m', 'WARNING': '\033[93m', 'OK': '\033[92m'}[health] + + print(f"{color}{disk['slot']:<4} {disk['devid']:<3} {disk['model']:<25} " + f"{disk['state'][:19]:<20} {disk['media_err']:<9} {disk['pred_fail']:<8} " + f"{disk['smart']:<5} {disk['temp']:<6} {health:<10}\033[0m") + + print("-" * 104) + print(f"SUMMARY: CRITICAL={stats['CRITICAL']:2d} WARNING={stats['WARNING']:2d} " + f"OK={stats['OK']:2d}") + + if stats['CRITICAL'] > 0: + print(f"ALERT: {stats['CRITICAL']} disks require immediate attention") + + vd_status = get_vd_status() + print(f"Virtual Disks: {vd_status}") + +def nagios_check(disks): + critical = sum(1 for d in disks if d['health'] == 'CRITICAL') + warning = sum(1 for d in disks if d['health'] == 'WARNING') + + perfdata = (f"total={len(disks)} critical={critical} warning={warning} " + f"media_err_total={sum(int(d['media_err']) for d in disks)}") + + if critical > 0: + print(f"CRITICAL: {critical} critical disks | {perfdata}") + sys.exit(NAGIOS_CRITICAL) + elif warning > 0: + print(f"WARNING: {warning} warning disks | {perfdata}") + sys.exit(NAGIOS_WARNING) + else: + print(f"OK: All {len(disks)} disks healthy | {perfdata}") + sys.exit(NAGIOS_OK) + +def save_csv(disks): + filename = f"raid-report-{datetime.now().strftime('%Y%m%d-%H%M')}.csv" + with open(filename, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=['slot','devid','model','state','media_err', + 'pred_fail','smart','temp','health']) + writer.writeheader() + writer.writerows(disks) + print(f"\nCSV exported: {os.path.abspath(filename)}") + +def get_vd_status(): + vd = run_megacli('-LDInfo -Lall -aALL') + if not vd: + return "Unknown" + if 'Optimal' in vd: + return "Optimal" + if any(s in vd for s in ['Degraded', 'Rebuild']): + return "Degraded/Rebuild" + return "Check manually" + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='MegaRAID Monitor - Clean Output') + parser.add_argument('--check', action='store_true', help='Nagios mode') + parser.add_argument('--csv', action='store_true', help='Save CSV') + args = parser.parse_args() + + pd_data = run_megacli('-PDList -aALL') + disks = parse_pdlist(pd_data) + + if args.check: + nagios_check(disks) + else: + print_table(disks) + if args.csv: + save_csv(disks)