#96 query clusters in 5 second interval
This commit is contained in:
@@ -27,23 +27,7 @@ def get_short_error_message(e: requests.exceptions.RequestException):
|
|||||||
return str(e)
|
return str(e)
|
||||||
|
|
||||||
|
|
||||||
def update_clusters(cluster_discoverer, query_cluster: callable, store, debug: bool):
|
def handle_query_failure(e: Exception, cluster, backoff: dict):
|
||||||
while True:
|
|
||||||
lock = store.acquire_lock()
|
|
||||||
if lock:
|
|
||||||
try:
|
|
||||||
clusters = cluster_discoverer.get_clusters()
|
|
||||||
cluster_ids = set()
|
|
||||||
for cluster in clusters:
|
|
||||||
cluster_ids.add(cluster.id)
|
|
||||||
backoff_key = '{}:backoff'.format(cluster.id)
|
|
||||||
backoff = store.get(backoff_key)
|
|
||||||
if backoff and time.time() < backoff['next_try']:
|
|
||||||
# cluster is still in backoff, skip
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
data = query_cluster(cluster)
|
|
||||||
except Exception as e:
|
|
||||||
if not backoff:
|
if not backoff:
|
||||||
backoff = {}
|
backoff = {}
|
||||||
tries = backoff.get('tries', 0) + 1
|
tries = backoff.get('tries', 0) + 1
|
||||||
@@ -58,11 +42,38 @@ def update_clusters(cluster_discoverer, query_cluster: callable, store, debug: b
|
|||||||
log = logger.exception
|
log = logger.exception
|
||||||
log('Failed to query cluster {} ({}): {} (try {}, wait {} seconds)'.format(
|
log('Failed to query cluster {} ({}): {} (try {}, wait {} seconds)'.format(
|
||||||
cluster.id, cluster.api_server_url, message, tries, round(wait_seconds)))
|
cluster.id, cluster.api_server_url, message, tries, round(wait_seconds)))
|
||||||
store.set(backoff_key, backoff)
|
return backoff
|
||||||
|
|
||||||
|
|
||||||
|
def update_clusters(cluster_discoverer, query_cluster: callable, store, query_interval=5, debug: bool=False):
|
||||||
|
while True:
|
||||||
|
lock = store.acquire_lock()
|
||||||
|
if lock:
|
||||||
|
try:
|
||||||
|
clusters = cluster_discoverer.get_clusters()
|
||||||
|
cluster_ids = set()
|
||||||
|
for cluster in clusters:
|
||||||
|
cluster_ids.add(cluster.id)
|
||||||
|
status_key = '{}:status'.format(cluster.id)
|
||||||
|
status = store.get(status_key) or {}
|
||||||
|
now = time.time()
|
||||||
|
if now < status.get('last_query_time', 0) + query_interval:
|
||||||
|
continue
|
||||||
|
backoff = status.get('backoff')
|
||||||
|
if backoff and now < backoff['next_try']:
|
||||||
|
# cluster is still in backoff, skip
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
logger.debug('Querying cluster {} ({})..'.format(cluster.id, cluster.api_server_url))
|
||||||
|
data = query_cluster(cluster)
|
||||||
|
except Exception as e:
|
||||||
|
backoff = handle_query_failure(e, cluster, backoff)
|
||||||
|
status['backoff'] = backoff
|
||||||
else:
|
else:
|
||||||
|
status['last_query_time'] = now
|
||||||
if backoff:
|
if backoff:
|
||||||
# reset backoff
|
logger.info('Cluster {} ({}) recovered after {} tries.'.format(cluster.id, cluster.api_server_url, backoff['tries']))
|
||||||
store.set(backoff_key, None)
|
del status['backoff']
|
||||||
old_data = store.get(data['id'])
|
old_data = store.get(data['id'])
|
||||||
if old_data:
|
if old_data:
|
||||||
# https://pikacode.com/phijaro/json_delta/ticket/11/
|
# https://pikacode.com/phijaro/json_delta/ticket/11/
|
||||||
@@ -72,11 +83,13 @@ def update_clusters(cluster_discoverer, query_cluster: callable, store, debug: b
|
|||||||
if delta:
|
if delta:
|
||||||
store.set(cluster.id, data)
|
store.set(cluster.id, data)
|
||||||
else:
|
else:
|
||||||
|
logger.info('Discovered new cluster {} ({}).'.format(cluster.id, cluster.api_server_url))
|
||||||
store.publish('clusterupdate', data)
|
store.publish('clusterupdate', data)
|
||||||
store.set(cluster.id, data)
|
store.set(cluster.id, data)
|
||||||
|
store.set(status_key, status)
|
||||||
store.set('cluster-ids', list(sorted(cluster_ids)))
|
store.set('cluster-ids', list(sorted(cluster_ids)))
|
||||||
except:
|
except:
|
||||||
logger.exception('Failed to update')
|
logger.exception('Failed to update')
|
||||||
finally:
|
finally:
|
||||||
store.release_lock(lock)
|
store.release_lock(lock)
|
||||||
gevent.sleep(5)
|
gevent.sleep(random_jitter(1))
|
||||||
|
|||||||
Reference in New Issue
Block a user