Improved handling of unavailable devices

This commit is contained in:
Thomas Kolb 2020-01-20 12:08:44 +01:00
parent f53dbe9be4
commit de3b7026a8
2 changed files with 47 additions and 1 deletions

View File

@ -40,6 +40,8 @@ Add or adjust the configuration for your collectds Python plugin as follows:
<Module collectd_sem6000>
Address "12:34:56:78:90:ab"
SocketName "FirstSocket"
ReadTimeout 30
SuspendTime 300
</Module>
<Module collectd_sem6000>
Address "ab:cd:ef:13:37:42"
@ -49,6 +51,14 @@ Add or adjust the configuration for your collectds Python plugin as follows:
</Plugin>
```
`ReadTimeout` and `SuspendTime` control whats happening when a device is
unavailable. If no value could be retrieved for `ReadTimeout` seconds, the
plugin does not retry for `SuspendTime` seconds. After that, normal operation
is resumed. This procedure ensures that an unreachable device does not block
other devices (too often) in the current single-threaded architecture.
If not specified, `ReadTimeout` is 30 seconds and `SuspendTime` is 5 minutes.
Make sure that everything listed in `requirements.txt` is available to the user
running collectd.

View File

@ -3,6 +3,7 @@
# vim: noet ts=2 sw=2 sts=2
import os
import time
import collectd
from sem6000 import SEMSocket
@ -25,6 +26,12 @@ def config_func(cfg):
if key in ['address', 'socketname']:
config[key] = value
if key == 'readtimeout':
config['readtimeout'] = int(value)
if key == 'suspendtime':
config['suspendtime'] = int(value)
if 'address' not in config.keys():
collectd.error('sem6000: address must be set')
return
@ -32,7 +39,19 @@ def config_func(cfg):
if 'socketname' not in config.keys():
config['socketname'] = config['address'].replace(':', '')
instances.append( {'config': config, 'socket': None} )
if 'readtimeout' not in config.keys():
config['readtimeout'] = 30
if 'suspendtime' not in config.keys():
config['suspendtime'] = 300
instances.append( {
'config': config,
'socket': None,
'suspended': False,
'lastsuccess': 0,
'resumetime': 0
} )
def read_func():
global instances
@ -40,6 +59,14 @@ def read_func():
for inst in instances:
config = inst['config']
if inst['suspended']:
if time.time() < inst['resumetime']:
continue
else:
collectd.info("sem6000: Device {} waking up.".format(config['address']))
inst['suspended'] = False
inst['lastsuccess'] = time.time()
try:
if inst['socket'] == None:
collectd.info("sem6000: Connecting to {}...".format(config['address']))
@ -52,6 +79,13 @@ def read_func():
collectd.warning("sem6000: Exception caught: {}".format(e))
collectd.warning("sem6000: Restarting on next cycle...")
if inst['lastsuccess'] < time.time() - config['readtimeout']:
collectd.error("sem6000: no successful communication with {} for {:.1f} seconds. Suspending device for {:.1f} seconds.".format(
config['address'], config['readtimeout'], config['suspendtime']))
inst['suspended'] = True
inst['resumetime'] = time.time() + config['suspendtime']
if inst['socket'] != None:
inst['socket'].disconnect()
inst['socket'] = None
@ -61,6 +95,8 @@ def read_func():
if socket != None and socket.voltage != 0:
collectd.debug("Uploading values for {}".format(socket.mac_address))
inst['lastsuccess'] = time.time()
val = collectd.Values(plugin = 'sem6000-{}'.format(config['socketname']))
val.type = 'voltage'