Project

General

Profile

Feature #16598 » carp_demote_on_service_fail.sh

Jászay Gábor, 12/20/2025 06:14 AM

 
1
#!/bin/sh
2
# Demote CARP VIPs if critical service(s) fail (pfSense 2.8+)
3

    
4
# ---- CONFIG ----
5
# Interface + VHID list (static, no awk needed)
6
LAN_IF="vmx1"
7
LAN_VHIDS="20"
8
DNSBL_IF="vmx1"
9
DNSBL_VHIDS="21"
10
WAN_IF="vmx0"
11
WAN1_VHIDS="10"
12
WAN2_VHIDS="11"
13
WAN3_VHIDS="12"
14

    
15
LAN_NORMAL_SKEW=0                           # advskew value when healthy
16
DNSBL_NORMAL_SKEW=0                         # advskew value when healthy
17
WAN1_NORMAL_SKEW=0                          # advskew value when healthy
18
WAN2_NORMAL_SKEW=100                        # advskew value when healthy
19
WAN3_NORMAL_SKEW=200                        # advskew value when healthy
20
LAN_DEMOTE_SKEW=251                         # advskew value when demoted
21
DNSBL_DEMOTE_SKEW=251                       # advskew value when demoted
22
WAN1_DEMOTE_SKEW=251                        # advskew value when demoted
23
WAN2_DEMOTE_SKEW=252                        # advskew value when demoted
24
WAN3_DEMOTE_SKEW=253                        # advskew value when demoted
25

    
26

    
27
REQUIRED_SERVICES="haproxy suricata"    # extendable: e.g. "haproxy suricata unbound"
28
FAIL_THRESHOLD=1                        # number of consecutive failures before demotion
29

    
30
STATE_DIR="/var/run/carp-demote"
31
COUNT_FILE="$STATE_DIR/fails.count"
32
MODE_FILE="$STATE_DIR/mode"             # values: "normal" | "demoted"
33
LOGTAG="carp-demote"
34

    
35
mkdir -p "$STATE_DIR"
36

    
37
log() { logger -t "$LOGTAG" -- "$*"; }
38

    
39
# ---- HELPER ----
40
set_skew_group() {
41
  IFACE="$1"
42
  VHIDS="$2"
43
  TARGET_SKEW="$3"
44
  for V in $VHIDS; do
45
    # ifconfig is idempotent: always sets the desired advskew
46
    ifconfig "$IFACE" vhid "$V" advskew "$TARGET_SKEW"
47
# >/dev/null 2>&1
48
  done
49
}
50

    
51
services_ok=1
52
down=""
53
for s in $REQUIRED_SERVICES; do
54
  if ! pgrep -x "$s" >/dev/null 2>&1; then
55
    services_ok=0
56
    down="$down $s"
57
  fi
58
done
59
# --- additional check: HTTPS reachability (at least 1 host must respond) ---
60
if ! curl -fsS --max-time 5 https://haproxy-domain1.tld/alive >/dev/null 2>&1; then
61
  if ! curl -fsS --max-time 5 https://haproxy-domain2.tld/ >/dev/null 2>&1 && \
62
     ! curl -fsS --max-time 5 https://haproxy-domain3.tld/ >/dev/null 2>&1; then
63
    services_ok=0
64
    down="$down webcheck"
65
  fi
66
fi
67

    
68
mode="normal"
69
[ -f "$MODE_FILE" ] && mode="$(cat "$MODE_FILE" 2>/dev/null || echo not_exists)"
70
count=0
71
[ -f "$COUNT_FILE" ] && count="$(cat "$COUNT_FILE" 2>/dev/null || echo 0)"
72

    
73
if [ "$services_ok" -eq 1 ]; then
74
  # all services are OK -> if previously demoted, restore normal state
75
  if [ "$mode" = "demoted" ]; then
76
    TARGET_SKEW="$NORMAL_SKEW"
77
    echo "go up"
78
    set_skew_group "$WAN_IF" $WAN1_VHIDS $WAN1_NORMAL_SKEW
79
#    set_skew_group "$WAN_IF" $WAN2_VHIDS $WAN2_NORMAL_SKEW
80
#    set_skew_group "$WAN_IF" $WAN3_VHIDS $WAN3_NORMAL_SKEW
81
    set_skew_group "$LAN_IF" $LAN_VHIDS $LAN_NORMAL_SKEW
82
#    set_skew_group "$DNSBL_IF" $DNSBL_VHIDS $DNSBL_NORMAL_SKEW
83
    echo normal > "$MODE_FILE"
84
    log "Services OK; VIPs restored (advskew=$NORMAL_SKEW) [WAN:$WAN_IF vhid:$WAN_VHIDS | LAN:$LAN_IF vhid:$LAN_VHIDS | DNSBL:$DNSBL_IF vhid:$DNSBL_VHIDS]"
85
  elif [ "$mode" != "normal" ]; then
86
    log "Services and VIP OK but MODE_FILE not found!"
87
  fi
88
  echo 0 > "$COUNT_FILE"
89
#  exit 0
90
else
91

    
92
  # at least one service is DOWN
93
  count=$((count+1))
94
  echo "$count" > "$COUNT_FILE"
95

    
96
  if [ "$mode" = "normal" ] && [ "$count" -ge "$FAIL_THRESHOLD" ]; then
97
    echo "go down"
98
    TARGET_SKEW="$DEMOTE_SKEW"
99
    set_skew_group "$WAN_IF" $WAN1_VHIDS $WAN1_DEMOTE_SKEW
100
#    set_skew_group "$WAN_IF" $WAN2_VHIDS $WAN2_DEMOTE_SKEW
101
#    set_skew_group "$WAN_IF" $WAN3_VHIDS $WAN3_DEMOTE_SKEW
102
    set_skew_group "$LAN_IF" $LAN_VHIDS $LAN_DEMOTE_SKEW
103
#    set_skew_group "$DNSBL_IF" $DNSBL_VHIDS $DNSBL_DEMOTE_SKEW
104
    echo demoted > "$MODE_FILE"
105
    log "Service DOWN ($down), threshold=$FAIL_THRESHOLD; VIPs demoted (advskew=$DEMOTE_SKEW) [WAN:$WAN_IF vhid:$WAN_VHIDS | LAN:$LAN_IF vhid:$LAN_VHIDS | DNSBL:$DNSBL_IF vhid:$DNSBL_VHIDS]"
106
  fi
107
fi
108

    
109
exit 0
110

    
    (1-1/1)