-
Notifications
You must be signed in to change notification settings - Fork 6
/
monitor.py
109 lines (93 loc) · 3.83 KB
/
monitor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#2024 Mitch Hall 45Drives
# Monitoring script. This script will run for 10 minutes and then kill itself. It is meant to be used
# in conjunction with crontab. Recommend running it at the top of every hour. It will run for 10 minutes and complete
# The script will monitor CPU load average, numastats, memory usage, and disk IO stats over time
# The script will spawn several log files in /var/log - load_average.log, disk_io.log, numa_stats.log
# and memory_usage.log
import psutil
import subprocess
import time
from datetime import datetime
import multiprocessing
import signal
import sys
# Convert bytes to GiB
def bytes_to_gib(bytes):
return bytes / (1024 ** 3)
def log_numa_stats(stop_event):
while not stop_event.is_set():
with open('/var/log/numa_stats.log', 'a') as f:
f.write(subprocess.getoutput('numastat -v'))
time.sleep(1)
def log_memory_usage(stop_event):
header = "timestamp,total_memory,available_memory,used_memory,free_memory\n"
line_count = 0
while not stop_event.is_set():
if line_count % 60 == 0:
with open('/var/log/memory_usage.log', 'a') as f:
f.write(header)
mem = psutil.virtual_memory()
timestamp = datetime.now().strftime('%a %b %d %I:%M:%S %p %Z %Y')
with open('/var/log/memory_usage.log', 'a') as f:
f.write(f"{timestamp},{mem.total},{mem.available},{mem.used},{mem.free}\n")
line_count += 1
time.sleep(1)
def log_disk_io(stop_event):
header = "timestamp,read_count,write_count,read_gib,write_gib\n"
line_count = 0
while not stop_event.is_set():
if line_count % 60 == 0:
with open('/var/log/disk_io.log', 'a') as f:
f.write(header)
io = psutil.disk_io_counters()
read_gib = bytes_to_gib(io.read_bytes)
write_gib = bytes_to_gib(io.write_bytes)
timestamp = datetime.now().strftime('%a %b %d %I:%M:%S %p %Z %Y')
with open('/var/log/disk_io.log', 'a') as f:
f.write(f"{timestamp},{io.read_count},{io.write_count},{read_gib:.6f},{write_gib:.6f}\n")
line_count += 1
time.sleep(1)
def log_load_average(stop_event):
header = "timestamp,load1,load5,load15\n"
line_count = 0
while not stop_event.is_set():
if line_count % 60 == 0:
with open('/var/log/load_average.log', 'a') as f:
f.write(header)
with open('/proc/loadavg', 'r') as f:
load_avg = f.read().strip().split()[:3]
timestamp = datetime.now().strftime('%a %b %d %I:%M:%S %p %Z %Y')
with open('/var/log/load_average.log', 'a') as f:
f.write(f"{timestamp},{load_avg[0]},{load_avg[1]},{load_avg[2]}\n")
line_count += 1
time.sleep(1)
def terminate_after_timeout(timeout, stop_event):
time.sleep(timeout)
stop_event.set()
def handle_interrupt(signum, frame, stop_event):
stop_event.set()
sys.exit(0)
def start_logging():
stop_event = multiprocessing.Event()
# Handle SIGINT to gracefully exit
signal.signal(signal.SIGINT, lambda s, f: handle_interrupt(s, f, stop_event))
processes = [
multiprocessing.Process(target=log_numa_stats, args=(stop_event,)),
multiprocessing.Process(target=log_memory_usage, args=(stop_event,)),
multiprocessing.Process(target=log_disk_io, args=(stop_event,)),
multiprocessing.Process(target=log_load_average, args=(stop_event,)),
multiprocessing.Process(target=terminate_after_timeout, args=(600, stop_event)) # 10 minutes
]
for p in processes:
p.start()
for p in processes:
try:
p.join()
except KeyboardInterrupt:
stop_event.set()
for p in processes:
p.join()
print("\nScript terminated gracefully.")
break
if __name__ == "__main__":
start_logging()