Commit All
This commit is contained in:
		
							
								
								
									
										389
									
								
								deployment/monitor.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										389
									
								
								deployment/monitor.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,389 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
# Turmli Calendar - Raspberry Pi Monitoring Script
 | 
			
		||||
# Monitors the health and performance of the deployed application
 | 
			
		||||
 | 
			
		||||
set -e
 | 
			
		||||
 | 
			
		||||
# Configuration
 | 
			
		||||
SERVICE_NAME="turmli-calendar"
 | 
			
		||||
APP_PORT="8000"
 | 
			
		||||
LOG_FILE="/var/log/turmli-calendar/monitor.log"
 | 
			
		||||
ALERT_THRESHOLD_MEM=200  # MB
 | 
			
		||||
ALERT_THRESHOLD_CPU=80   # Percentage
 | 
			
		||||
 | 
			
		||||
# Colors
 | 
			
		||||
RED='\033[0;31m'
 | 
			
		||||
GREEN='\033[0;32m'
 | 
			
		||||
YELLOW='\033[1;33m'
 | 
			
		||||
BLUE='\033[0;34m'
 | 
			
		||||
CYAN='\033[0;36m'
 | 
			
		||||
NC='\033[0m'
 | 
			
		||||
 | 
			
		||||
# Helper functions
 | 
			
		||||
print_header() {
 | 
			
		||||
    echo -e "${CYAN}════════════════════════════════════════════════${NC}"
 | 
			
		||||
    echo -e "${CYAN}    Turmli Calendar - System Monitor${NC}"
 | 
			
		||||
    echo -e "${CYAN}    $(date '+%Y-%m-%d %H:%M:%S')${NC}"
 | 
			
		||||
    echo -e "${CYAN}════════════════════════════════════════════════${NC}"
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
print_section() {
 | 
			
		||||
    echo -e "${BLUE}━━━ $1 ━━━${NC}"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
print_ok() {
 | 
			
		||||
    echo -e "${GREEN}✓${NC} $1"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
print_warning() {
 | 
			
		||||
    echo -e "${YELLOW}⚠${NC} $1"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
print_error() {
 | 
			
		||||
    echo -e "${RED}✗${NC} $1"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# System information
 | 
			
		||||
show_system_info() {
 | 
			
		||||
    print_section "System Information"
 | 
			
		||||
    
 | 
			
		||||
    # Hostname and OS
 | 
			
		||||
    echo "Hostname: $(hostname)"
 | 
			
		||||
    echo "OS: $(cat /etc/os-release | grep PRETTY_NAME | cut -d'"' -f2)"
 | 
			
		||||
    echo "Kernel: $(uname -r)"
 | 
			
		||||
    echo "Uptime: $(uptime -p)"
 | 
			
		||||
    
 | 
			
		||||
    # CPU info
 | 
			
		||||
    echo "CPU: $(cat /proc/cpuinfo | grep 'model name' | head -1 | cut -d':' -f2 | xargs)"
 | 
			
		||||
    echo "Cores: $(nproc)"
 | 
			
		||||
    
 | 
			
		||||
    # Temperature
 | 
			
		||||
    if command -v vcgencmd &> /dev/null; then
 | 
			
		||||
        local temp=$(vcgencmd measure_temp | cut -d'=' -f2)
 | 
			
		||||
        echo "Temperature: $temp"
 | 
			
		||||
        
 | 
			
		||||
        # Check throttling
 | 
			
		||||
        local throttled=$(vcgencmd get_throttled | cut -d'=' -f2)
 | 
			
		||||
        if [ "$throttled" != "0x0" ]; then
 | 
			
		||||
            print_warning "CPU throttling detected: $throttled"
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Service status
 | 
			
		||||
check_service_status() {
 | 
			
		||||
    print_section "Service Status"
 | 
			
		||||
    
 | 
			
		||||
    if systemctl is-active --quiet ${SERVICE_NAME}; then
 | 
			
		||||
        print_ok "Service is running"
 | 
			
		||||
        
 | 
			
		||||
        # Get PID and uptime
 | 
			
		||||
        local pid=$(systemctl show ${SERVICE_NAME} -p MainPID --value)
 | 
			
		||||
        if [ "$pid" != "0" ]; then
 | 
			
		||||
            echo "PID: $pid"
 | 
			
		||||
            
 | 
			
		||||
            # Process uptime
 | 
			
		||||
            if [ -f "/proc/$pid/stat" ]; then
 | 
			
		||||
                local start_time=$(stat -c %Y /proc/$pid)
 | 
			
		||||
                local current_time=$(date +%s)
 | 
			
		||||
                local uptime=$((current_time - start_time))
 | 
			
		||||
                echo "Process uptime: $((uptime / 3600))h $((uptime % 3600 / 60))m"
 | 
			
		||||
            fi
 | 
			
		||||
        fi
 | 
			
		||||
    else
 | 
			
		||||
        print_error "Service is not running"
 | 
			
		||||
        echo "Last exit status: $(systemctl show ${SERVICE_NAME} -p ExecMainStatus --value)"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Show recent restarts
 | 
			
		||||
    local restarts=$(systemctl show ${SERVICE_NAME} -p NRestarts --value)
 | 
			
		||||
    if [ "$restarts" -gt 0 ]; then
 | 
			
		||||
        print_warning "Service has restarted $restarts times"
 | 
			
		||||
    fi
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Application health check
 | 
			
		||||
check_application_health() {
 | 
			
		||||
    print_section "Application Health"
 | 
			
		||||
    
 | 
			
		||||
    # Test API endpoint
 | 
			
		||||
    if curl -s -f -m 5 "http://localhost:${APP_PORT}/api/events" > /dev/null 2>&1; then
 | 
			
		||||
        print_ok "API endpoint is responding"
 | 
			
		||||
        
 | 
			
		||||
        # Get event count
 | 
			
		||||
        local response=$(curl -s "http://localhost:${APP_PORT}/api/events" 2>/dev/null)
 | 
			
		||||
        if [ ! -z "$response" ]; then
 | 
			
		||||
            local events=$(echo "$response" | python3 -c "import sys, json; data=json.load(sys.stdin); print(len(data.get('events', [])))" 2>/dev/null || echo "unknown")
 | 
			
		||||
            echo "Calendar events: $events"
 | 
			
		||||
            
 | 
			
		||||
            # Check last update time
 | 
			
		||||
            local last_updated=$(echo "$response" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('last_updated', 'unknown'))" 2>/dev/null || echo "unknown")
 | 
			
		||||
            echo "Last updated: $last_updated"
 | 
			
		||||
        fi
 | 
			
		||||
    else
 | 
			
		||||
        print_error "API endpoint not responding"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Test web interface
 | 
			
		||||
    if curl -s -f -m 5 "http://localhost:${APP_PORT}/" > /dev/null 2>&1; then
 | 
			
		||||
        print_ok "Web interface is accessible"
 | 
			
		||||
    else
 | 
			
		||||
        print_error "Web interface not accessible"
 | 
			
		||||
    fi
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Resource usage
 | 
			
		||||
show_resource_usage() {
 | 
			
		||||
    print_section "Resource Usage"
 | 
			
		||||
    
 | 
			
		||||
    # Memory usage
 | 
			
		||||
    local mem_total=$(free -m | grep Mem | awk '{print $2}')
 | 
			
		||||
    local mem_used=$(free -m | grep Mem | awk '{print $3}')
 | 
			
		||||
    local mem_percent=$((mem_used * 100 / mem_total))
 | 
			
		||||
    echo "System Memory: ${mem_used}/${mem_total} MB (${mem_percent}%)"
 | 
			
		||||
    
 | 
			
		||||
    if [ "$mem_percent" -gt 90 ]; then
 | 
			
		||||
        print_warning "High memory usage detected"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Swap usage
 | 
			
		||||
    local swap_total=$(free -m | grep Swap | awk '{print $2}')
 | 
			
		||||
    local swap_used=$(free -m | grep Swap | awk '{print $3}')
 | 
			
		||||
    if [ "$swap_total" -gt 0 ]; then
 | 
			
		||||
        local swap_percent=$((swap_used * 100 / swap_total))
 | 
			
		||||
        echo "Swap: ${swap_used}/${swap_total} MB (${swap_percent}%)"
 | 
			
		||||
        
 | 
			
		||||
        if [ "$swap_percent" -gt 50 ]; then
 | 
			
		||||
            print_warning "High swap usage - performance may be degraded"
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Process-specific memory
 | 
			
		||||
    local pid=$(systemctl show ${SERVICE_NAME} -p MainPID --value)
 | 
			
		||||
    if [ "$pid" != "0" ] && [ -f "/proc/$pid/status" ]; then
 | 
			
		||||
        local proc_mem=$(grep VmRSS /proc/$pid/status | awk '{print $2/1024}')
 | 
			
		||||
        printf "Process Memory: %.1f MB\n" "$proc_mem"
 | 
			
		||||
        
 | 
			
		||||
        if (( $(echo "$proc_mem > $ALERT_THRESHOLD_MEM" | bc -l) )); then
 | 
			
		||||
            print_warning "Process memory exceeds threshold (${ALERT_THRESHOLD_MEM} MB)"
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # CPU usage
 | 
			
		||||
    echo
 | 
			
		||||
    echo "CPU Load:"
 | 
			
		||||
    local load=$(uptime | awk -F'load average:' '{print $2}')
 | 
			
		||||
    echo "  Load average: $load"
 | 
			
		||||
    
 | 
			
		||||
    # Process CPU usage (rough estimate)
 | 
			
		||||
    if [ "$pid" != "0" ] && [ -f "/proc/$pid/stat" ]; then
 | 
			
		||||
        local cpu_usage=$(ps -p $pid -o %cpu= | tr -d ' ')
 | 
			
		||||
        printf "  Process CPU: %.1f%%\n" "$cpu_usage"
 | 
			
		||||
        
 | 
			
		||||
        if (( $(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc -l) )); then
 | 
			
		||||
            print_warning "High CPU usage detected"
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Disk usage
 | 
			
		||||
show_disk_usage() {
 | 
			
		||||
    print_section "Disk Usage"
 | 
			
		||||
    
 | 
			
		||||
    # Root filesystem
 | 
			
		||||
    local disk_usage=$(df -h / | tail -1)
 | 
			
		||||
    echo "Root filesystem:"
 | 
			
		||||
    echo "  $disk_usage"
 | 
			
		||||
    
 | 
			
		||||
    local disk_percent=$(df / | tail -1 | awk '{print $5}' | tr -d '%')
 | 
			
		||||
    if [ "$disk_percent" -gt 80 ]; then
 | 
			
		||||
        print_warning "Disk usage above 80%"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Application directory
 | 
			
		||||
    if [ -d "/opt/turmli-calendar" ]; then
 | 
			
		||||
        local app_size=$(du -sh /opt/turmli-calendar 2>/dev/null | cut -f1)
 | 
			
		||||
        echo "Application directory: $app_size"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Log directory
 | 
			
		||||
    if [ -d "/var/log/turmli-calendar" ]; then
 | 
			
		||||
        local log_size=$(du -sh /var/log/turmli-calendar 2>/dev/null | cut -f1)
 | 
			
		||||
        echo "Log directory: $log_size"
 | 
			
		||||
    fi
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Network statistics
 | 
			
		||||
show_network_stats() {
 | 
			
		||||
    print_section "Network Statistics"
 | 
			
		||||
    
 | 
			
		||||
    # Network interfaces
 | 
			
		||||
    local interfaces=$(ip -brief link show | grep UP | awk '{print $1}')
 | 
			
		||||
    for iface in $interfaces; do
 | 
			
		||||
        if [[ "$iface" != "lo" ]]; then
 | 
			
		||||
            echo "Interface: $iface"
 | 
			
		||||
            local ip=$(ip -brief addr show $iface | awk '{print $3}')
 | 
			
		||||
            echo "  IP: $ip"
 | 
			
		||||
            
 | 
			
		||||
            # Connection count
 | 
			
		||||
            local connections=$(ss -tan | grep :${APP_PORT} | grep ESTAB | wc -l)
 | 
			
		||||
            echo "  Active connections on port ${APP_PORT}: $connections"
 | 
			
		||||
        fi
 | 
			
		||||
    done
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Recent errors
 | 
			
		||||
show_recent_errors() {
 | 
			
		||||
    print_section "Recent Errors (last 10)"
 | 
			
		||||
    
 | 
			
		||||
    journalctl -u ${SERVICE_NAME} -p err -n 10 --no-pager 2>/dev/null || echo "No recent errors"
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Performance summary
 | 
			
		||||
show_performance_summary() {
 | 
			
		||||
    print_section "Performance Summary"
 | 
			
		||||
    
 | 
			
		||||
    local status="HEALTHY"
 | 
			
		||||
    local issues=0
 | 
			
		||||
    
 | 
			
		||||
    # Check service
 | 
			
		||||
    if ! systemctl is-active --quiet ${SERVICE_NAME}; then
 | 
			
		||||
        status="CRITICAL"
 | 
			
		||||
        ((issues++))
 | 
			
		||||
        print_error "Service not running"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Check API
 | 
			
		||||
    if ! curl -s -f -m 5 "http://localhost:${APP_PORT}/api/events" > /dev/null 2>&1; then
 | 
			
		||||
        status="DEGRADED"
 | 
			
		||||
        ((issues++))
 | 
			
		||||
        print_warning "API not responding"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Check memory
 | 
			
		||||
    local mem_percent=$(free -m | grep Mem | awk '{print ($3*100)/$2}' | cut -d'.' -f1)
 | 
			
		||||
    if [ "$mem_percent" -gt 90 ]; then
 | 
			
		||||
        status="DEGRADED"
 | 
			
		||||
        ((issues++))
 | 
			
		||||
        print_warning "High memory usage"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    # Check disk
 | 
			
		||||
    local disk_percent=$(df / | tail -1 | awk '{print $5}' | tr -d '%')
 | 
			
		||||
    if [ "$disk_percent" -gt 90 ]; then
 | 
			
		||||
        status="DEGRADED"
 | 
			
		||||
        ((issues++))
 | 
			
		||||
        print_warning "High disk usage"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    echo
 | 
			
		||||
    if [ "$issues" -eq 0 ]; then
 | 
			
		||||
        print_ok "System Status: $status"
 | 
			
		||||
    elif [ "$issues" -lt 2 ]; then
 | 
			
		||||
        print_warning "System Status: $status ($issues issue)"
 | 
			
		||||
    else
 | 
			
		||||
        print_error "System Status: $status ($issues issues)"
 | 
			
		||||
    fi
 | 
			
		||||
    echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Continuous monitoring mode
 | 
			
		||||
monitor_continuous() {
 | 
			
		||||
    while true; do
 | 
			
		||||
        clear
 | 
			
		||||
        print_header
 | 
			
		||||
        check_service_status
 | 
			
		||||
        check_application_health
 | 
			
		||||
        show_resource_usage
 | 
			
		||||
        show_performance_summary
 | 
			
		||||
        
 | 
			
		||||
        echo "Press Ctrl+C to exit"
 | 
			
		||||
        echo "Refreshing in 30 seconds..."
 | 
			
		||||
        sleep 30
 | 
			
		||||
    done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Log monitoring data
 | 
			
		||||
log_metrics() {
 | 
			
		||||
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
 | 
			
		||||
    local pid=$(systemctl show ${SERVICE_NAME} -p MainPID --value)
 | 
			
		||||
    local mem_used=$(free -m | grep Mem | awk '{print $3}')
 | 
			
		||||
    local cpu_load=$(uptime | awk -F'load average:' '{print $2}' | cut -d',' -f1)
 | 
			
		||||
    local api_status="DOWN"
 | 
			
		||||
    
 | 
			
		||||
    if curl -s -f -m 5 "http://localhost:${APP_PORT}/api/events" > /dev/null 2>&1; then
 | 
			
		||||
        api_status="UP"
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
    echo "$timestamp,mem=$mem_used,cpu=$cpu_load,api=$api_status" >> "$LOG_FILE"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Command line interface
 | 
			
		||||
case "$1" in
 | 
			
		||||
    status)
 | 
			
		||||
        print_header
 | 
			
		||||
        check_service_status
 | 
			
		||||
        check_application_health
 | 
			
		||||
        show_performance_summary
 | 
			
		||||
        ;;
 | 
			
		||||
    full)
 | 
			
		||||
        print_header
 | 
			
		||||
        show_system_info
 | 
			
		||||
        check_service_status
 | 
			
		||||
        check_application_health
 | 
			
		||||
        show_resource_usage
 | 
			
		||||
        show_disk_usage
 | 
			
		||||
        show_network_stats
 | 
			
		||||
        show_recent_errors
 | 
			
		||||
        show_performance_summary
 | 
			
		||||
        ;;
 | 
			
		||||
    monitor)
 | 
			
		||||
        monitor_continuous
 | 
			
		||||
        ;;
 | 
			
		||||
    resources)
 | 
			
		||||
        print_header
 | 
			
		||||
        show_resource_usage
 | 
			
		||||
        show_disk_usage
 | 
			
		||||
        ;;
 | 
			
		||||
    health)
 | 
			
		||||
        print_header
 | 
			
		||||
        check_application_health
 | 
			
		||||
        ;;
 | 
			
		||||
    errors)
 | 
			
		||||
        print_header
 | 
			
		||||
        show_recent_errors
 | 
			
		||||
        ;;
 | 
			
		||||
    log)
 | 
			
		||||
        log_metrics
 | 
			
		||||
        echo "Metrics logged to $LOG_FILE"
 | 
			
		||||
        ;;
 | 
			
		||||
    *)
 | 
			
		||||
        echo "Usage: $0 {status|full|monitor|resources|health|errors|log}"
 | 
			
		||||
        echo
 | 
			
		||||
        echo "Commands:"
 | 
			
		||||
        echo "  status    - Quick status check"
 | 
			
		||||
        echo "  full      - Complete system analysis"
 | 
			
		||||
        echo "  monitor   - Continuous monitoring (30s refresh)"
 | 
			
		||||
        echo "  resources - Resource usage details"
 | 
			
		||||
        echo "  health    - Application health check"
 | 
			
		||||
        echo "  errors    - Show recent errors"
 | 
			
		||||
        echo "  log       - Log metrics to file"
 | 
			
		||||
        echo
 | 
			
		||||
        echo "Examples:"
 | 
			
		||||
        echo "  $0 status           # Quick status"
 | 
			
		||||
        echo "  $0 monitor          # Live monitoring"
 | 
			
		||||
        echo "  $0 full             # Full report"
 | 
			
		||||
        exit 1
 | 
			
		||||
        ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
exit 0
 | 
			
		||||
		Reference in New Issue
	
	Block a user