#!/bin/bash # Turmli Calendar - Raspberry Pi Monitoring Script # Monitors the health and performance of the deployed application set -e # Configuration SERVICE_NAME="turmli-calendar" APP_PORT="8000" LOG_FILE="/var/log/turmli-calendar/monitor.log" ALERT_THRESHOLD_MEM=200 # MB ALERT_THRESHOLD_CPU=80 # Percentage # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' # Helper functions print_header() { echo -e "${CYAN}════════════════════════════════════════════════${NC}" echo -e "${CYAN} Turmli Calendar - System Monitor${NC}" echo -e "${CYAN} $(date '+%Y-%m-%d %H:%M:%S')${NC}" echo -e "${CYAN}════════════════════════════════════════════════${NC}" echo } print_section() { echo -e "${BLUE}━━━ $1 ━━━${NC}" } print_ok() { echo -e "${GREEN}✓${NC} $1" } print_warning() { echo -e "${YELLOW}⚠${NC} $1" } print_error() { echo -e "${RED}✗${NC} $1" } # System information show_system_info() { print_section "System Information" # Hostname and OS echo "Hostname: $(hostname)" echo "OS: $(cat /etc/os-release | grep PRETTY_NAME | cut -d'"' -f2)" echo "Kernel: $(uname -r)" echo "Uptime: $(uptime -p)" # CPU info echo "CPU: $(cat /proc/cpuinfo | grep 'model name' | head -1 | cut -d':' -f2 | xargs)" echo "Cores: $(nproc)" # Temperature if command -v vcgencmd &> /dev/null; then local temp=$(vcgencmd measure_temp | cut -d'=' -f2) echo "Temperature: $temp" # Check throttling local throttled=$(vcgencmd get_throttled | cut -d'=' -f2) if [ "$throttled" != "0x0" ]; then print_warning "CPU throttling detected: $throttled" fi fi echo } # Service status check_service_status() { print_section "Service Status" if systemctl is-active --quiet ${SERVICE_NAME}; then print_ok "Service is running" # Get PID and uptime local pid=$(systemctl show ${SERVICE_NAME} -p MainPID --value) if [ "$pid" != "0" ]; then echo "PID: $pid" # Process uptime if [ -f "/proc/$pid/stat" ]; then local start_time=$(stat -c %Y /proc/$pid) local current_time=$(date +%s) local uptime=$((current_time - start_time)) echo "Process uptime: $((uptime / 3600))h $((uptime % 3600 / 60))m" fi fi else print_error "Service is not running" echo "Last exit status: $(systemctl show ${SERVICE_NAME} -p ExecMainStatus --value)" fi # Show recent restarts local restarts=$(systemctl show ${SERVICE_NAME} -p NRestarts --value) if [ "$restarts" -gt 0 ]; then print_warning "Service has restarted $restarts times" fi echo } # Application health check check_application_health() { print_section "Application Health" # Test API endpoint if curl -s -f -m 5 "http://localhost:${APP_PORT}/api/events" > /dev/null 2>&1; then print_ok "API endpoint is responding" # Get event count local response=$(curl -s "http://localhost:${APP_PORT}/api/events" 2>/dev/null) if [ ! -z "$response" ]; then local events=$(echo "$response" | python3 -c "import sys, json; data=json.load(sys.stdin); print(len(data.get('events', [])))" 2>/dev/null || echo "unknown") echo "Calendar events: $events" # Check last update time local last_updated=$(echo "$response" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('last_updated', 'unknown'))" 2>/dev/null || echo "unknown") echo "Last updated: $last_updated" fi else print_error "API endpoint not responding" fi # Test web interface if curl -s -f -m 5 "http://localhost:${APP_PORT}/" > /dev/null 2>&1; then print_ok "Web interface is accessible" else print_error "Web interface not accessible" fi echo } # Resource usage show_resource_usage() { print_section "Resource Usage" # Memory usage local mem_total=$(free -m | grep Mem | awk '{print $2}') local mem_used=$(free -m | grep Mem | awk '{print $3}') local mem_percent=$((mem_used * 100 / mem_total)) echo "System Memory: ${mem_used}/${mem_total} MB (${mem_percent}%)" if [ "$mem_percent" -gt 90 ]; then print_warning "High memory usage detected" fi # Swap usage local swap_total=$(free -m | grep Swap | awk '{print $2}') local swap_used=$(free -m | grep Swap | awk '{print $3}') if [ "$swap_total" -gt 0 ]; then local swap_percent=$((swap_used * 100 / swap_total)) echo "Swap: ${swap_used}/${swap_total} MB (${swap_percent}%)" if [ "$swap_percent" -gt 50 ]; then print_warning "High swap usage - performance may be degraded" fi fi # Process-specific memory local pid=$(systemctl show ${SERVICE_NAME} -p MainPID --value) if [ "$pid" != "0" ] && [ -f "/proc/$pid/status" ]; then local proc_mem=$(grep VmRSS /proc/$pid/status | awk '{print $2/1024}') printf "Process Memory: %.1f MB\n" "$proc_mem" if (( $(echo "$proc_mem > $ALERT_THRESHOLD_MEM" | bc -l) )); then print_warning "Process memory exceeds threshold (${ALERT_THRESHOLD_MEM} MB)" fi fi # CPU usage echo echo "CPU Load:" local load=$(uptime | awk -F'load average:' '{print $2}') echo " Load average: $load" # Process CPU usage (rough estimate) if [ "$pid" != "0" ] && [ -f "/proc/$pid/stat" ]; then local cpu_usage=$(ps -p $pid -o %cpu= | tr -d ' ') printf " Process CPU: %.1f%%\n" "$cpu_usage" if (( $(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc -l) )); then print_warning "High CPU usage detected" fi fi echo } # Disk usage show_disk_usage() { print_section "Disk Usage" # Root filesystem local disk_usage=$(df -h / | tail -1) echo "Root filesystem:" echo " $disk_usage" local disk_percent=$(df / | tail -1 | awk '{print $5}' | tr -d '%') if [ "$disk_percent" -gt 80 ]; then print_warning "Disk usage above 80%" fi # Application directory if [ -d "/opt/turmli-calendar" ]; then local app_size=$(du -sh /opt/turmli-calendar 2>/dev/null | cut -f1) echo "Application directory: $app_size" fi # Log directory if [ -d "/var/log/turmli-calendar" ]; then local log_size=$(du -sh /var/log/turmli-calendar 2>/dev/null | cut -f1) echo "Log directory: $log_size" fi echo } # Network statistics show_network_stats() { print_section "Network Statistics" # Network interfaces local interfaces=$(ip -brief link show | grep UP | awk '{print $1}') for iface in $interfaces; do if [[ "$iface" != "lo" ]]; then echo "Interface: $iface" local ip=$(ip -brief addr show $iface | awk '{print $3}') echo " IP: $ip" # Connection count local connections=$(ss -tan | grep :${APP_PORT} | grep ESTAB | wc -l) echo " Active connections on port ${APP_PORT}: $connections" fi done echo } # Recent errors show_recent_errors() { print_section "Recent Errors (last 10)" journalctl -u ${SERVICE_NAME} -p err -n 10 --no-pager 2>/dev/null || echo "No recent errors" echo } # Performance summary show_performance_summary() { print_section "Performance Summary" local status="HEALTHY" local issues=0 # Check service if ! systemctl is-active --quiet ${SERVICE_NAME}; then status="CRITICAL" ((issues++)) print_error "Service not running" fi # Check API if ! curl -s -f -m 5 "http://localhost:${APP_PORT}/api/events" > /dev/null 2>&1; then status="DEGRADED" ((issues++)) print_warning "API not responding" fi # Check memory local mem_percent=$(free -m | grep Mem | awk '{print ($3*100)/$2}' | cut -d'.' -f1) if [ "$mem_percent" -gt 90 ]; then status="DEGRADED" ((issues++)) print_warning "High memory usage" fi # Check disk local disk_percent=$(df / | tail -1 | awk '{print $5}' | tr -d '%') if [ "$disk_percent" -gt 90 ]; then status="DEGRADED" ((issues++)) print_warning "High disk usage" fi echo if [ "$issues" -eq 0 ]; then print_ok "System Status: $status" elif [ "$issues" -lt 2 ]; then print_warning "System Status: $status ($issues issue)" else print_error "System Status: $status ($issues issues)" fi echo } # Continuous monitoring mode monitor_continuous() { while true; do clear print_header check_service_status check_application_health show_resource_usage show_performance_summary echo "Press Ctrl+C to exit" echo "Refreshing in 30 seconds..." sleep 30 done } # Log monitoring data log_metrics() { local timestamp=$(date '+%Y-%m-%d %H:%M:%S') local pid=$(systemctl show ${SERVICE_NAME} -p MainPID --value) local mem_used=$(free -m | grep Mem | awk '{print $3}') local cpu_load=$(uptime | awk -F'load average:' '{print $2}' | cut -d',' -f1) local api_status="DOWN" if curl -s -f -m 5 "http://localhost:${APP_PORT}/api/events" > /dev/null 2>&1; then api_status="UP" fi echo "$timestamp,mem=$mem_used,cpu=$cpu_load,api=$api_status" >> "$LOG_FILE" } # Command line interface case "$1" in status) print_header check_service_status check_application_health show_performance_summary ;; full) print_header show_system_info check_service_status check_application_health show_resource_usage show_disk_usage show_network_stats show_recent_errors show_performance_summary ;; monitor) monitor_continuous ;; resources) print_header show_resource_usage show_disk_usage ;; health) print_header check_application_health ;; errors) print_header show_recent_errors ;; log) log_metrics echo "Metrics logged to $LOG_FILE" ;; *) echo "Usage: $0 {status|full|monitor|resources|health|errors|log}" echo echo "Commands:" echo " status - Quick status check" echo " full - Complete system analysis" echo " monitor - Continuous monitoring (30s refresh)" echo " resources - Resource usage details" echo " health - Application health check" echo " errors - Show recent errors" echo " log - Log metrics to file" echo echo "Examples:" echo " $0 status # Quick status" echo " $0 monitor # Live monitoring" echo " $0 full # Full report" exit 1 ;; esac exit 0