Project

General

Profile

« Previous | Next » 

Revision 5affb137

Added by Vladimir Voskoboynikov over 4 years ago

Prevent possible race condition, fixes #9450

Instead of calling get_dpinger_status() again in the code handling dpinger
alarms, save the original alarm status in rc.gateway_alarm to a file, then
read the status from the file.

View differences:

src/etc/inc/gwlb.inc
316 316
	return;
317 317
}
318 318

  
319
function get_dpinger_status($gwname, $gways = false, $action_disable = false) {
319
function get_dpinger_status($gwname, $gways = false, $action_disable = false, $clear_alarm = false) {
320 320
	global $g;
321 321

  
322 322
	$running_processes = running_dpinger_processes();
......
329 329
	$proc = $running_processes[$gwname];
330 330
	unset($running_processes);
331 331

  
332
	$timeoutcounter = 0;
333
	while (true) {
334
		if (!file_exists($proc['socket'])) {
335
			log_error("dpinger: status socket {$proc['socket']} not found");
336
			return false;
337
		}
338
		$fp = @stream_socket_client("unix://{$proc['socket']}", $errno, $errstr, 10);
339
		if (!$fp) {
340
			log_error(sprintf(gettext('dpinger: cannot connect to status socket %1$s - %2$s (%3$s)'), $proc['socket'], $errstr, $errno));
341
			return false;
342
		}
343

  
344
		$status = '';
345
		while (!feof($fp)) {
346
			$status .= fgets($fp, 1024);
347
		}
348
		fclose($fp);
349

  
332
	if (file_exists("/tmp/.alarm.{$gwname}")) {
333
		// if dpinger has raised an alarm, read the stats as they were for the alarm
350 334
		$r = array();
351 335
		list(
352 336
			$r['gwname'],
353 337
			$r['latency_avg'],
354 338
			$r['latency_stddev'],
355 339
			$r['loss']
356
		) = explode(' ', preg_replace('/\n/', '', $status));
357

  
358
		// dpinger returns '<gwname> 0 0 0' when queried directly after it starts.
359
		// while a latency of 0 and a loss of 0 would be perfect, in a real world it doesnt happen.
360
		// or does it, anyone? if so we must 'detect' the initialization period differently..
361
		$ready = $r['latency_stddev'] != '0' || $r['loss'] != '0';
340
		) = explode(' ', preg_replace('/\n/', '', file_get_contents("/tmp/.alarm.{$gwname}")));
362 341

  
363
		if ($ready) {
364
			break;
365
		} else {
366
			$timeoutcounter++;
367
			if ($timeoutcounter > 300) {
368
				log_error(sprintf(gettext('dpinger: timeout while retrieving status for gateway %s'), $gwname));
342
		if ($clear_alarm) {
343
			unlink("/tmp/.alarm.{$gwname}");
344
		}
345
	} else {
346
		// otherwise, read the current stats from the dpinger socket
347
		$timeoutcounter = 0;
348
		while (true) {
349
			if (!file_exists($proc['socket'])) {
350
				log_error("dpinger: status socket {$proc['socket']} not found");
369 351
				return false;
370 352
			}
371
			usleep(10000);
353
			$fp = @stream_socket_client("unix://{$proc['socket']}", $errno, $errstr, 10);
354
			if (!$fp) {
355
				log_error(sprintf(gettext('dpinger: cannot connect to status socket %1$s - %2$s (%3$s)'), $proc['socket'], $errstr, $errno));
356
				return false;
357
			}
358

  
359
			$status = '';
360
			while (!feof($fp)) {
361
				$status .= fgets($fp, 1024);
362
			}
363
			fclose($fp);
364

  
365
			$r = array();
366
			list(
367
				$r['gwname'],
368
				$r['latency_avg'],
369
				$r['latency_stddev'],
370
				$r['loss']
371
			) = explode(' ', preg_replace('/\n/', '', $status));
372

  
373
			// dpinger returns '<gwname> 0 0 0' when queried directly after it starts.
374
			// while a latency of 0 and a loss of 0 would be perfect, in a real world it doesnt happen.
375
			// or does it, anyone? if so we must 'detect' the initialization period differently..
376
			$ready = $r['latency_stddev'] != '0' || $r['loss'] != '0';
377

  
378
			if ($ready) {
379
				break;
380
			} else {
381
				$timeoutcounter++;
382
				if ($timeoutcounter > 300) {
383
					log_error(sprintf(gettext('dpinger: timeout while retrieving status for gateway %s'), $gwname));
384
					return false;
385
				}
386
				usleep(10000);
387
			}
372 388
		}
373 389
	}
374 390

  
......
432 448
}
433 449

  
434 450
/* return the status of the dpinger targets as an array */
435
function return_gateways_status($byname = false, $gways = false) {
451
function return_gateways_status($byname = false, $gways = false, $clear_alarm = false) {
436 452
	global $config, $g;
437 453

  
438 454
	$dpinger_gws = running_dpinger_processes();
......
449 465
		// That reports "highdelay" or "highloss" rather than just "down".
450 466
		// Because reporting the gateway down would be misleading (gateway action is disabled)
451 467
		$action_disable = $gateways_arr[$gwname]['action_disable'];
452
		$dpinger_status = get_dpinger_status($gwname, $action_disable);
468
		$dpinger_status = get_dpinger_status($gwname, $action_disable, $clear_alarm);
453 469
		if ($dpinger_status === false) {
454 470
			continue;
455 471
		}
src/etc/rc.gateway_alarm
37 37

  
38 38
echo ">>> Gateway alarm: ${GW} (Addr:${alarm_addr} Alarm:${alarm_flag} RTT:$( print_rtt ${alarm_rtt} ) RTTsd:$( print_rtt ${alarm_rttsd} ) Loss:${alarm_loss}%)" | /usr/bin/logger -p daemon.info -i -t rc.gateway_alarm
39 39

  
40
# Save the gateway alarm readings so they can be read by the scripts below
41
echo "${GW} ${alarm_rtt} ${alarm_rttsd} ${alarm_loss}" > "/tmp/.alarm.${GW}"
42

  
40 43
/usr/local/sbin/pfSctl \
41 44
	-c "service reload dyndns ${GW}" \
42 45
	-c "service reload ipsecdns" \

Also available in: Unified diff