diff --git a/kube_ops_view/cluster_discovery.py b/kube_ops_view/cluster_discovery.py index cf17c3a..4a13751 100644 --- a/kube_ops_view/cluster_discovery.py +++ b/kube_ops_view/cluster_discovery.py @@ -1,11 +1,11 @@ +import logging +import re import time from pathlib import Path from urllib.parse import urljoin import kubernetes.client import kubernetes.config -import logging -import re import requests import tokens from requests.auth import AuthBase diff --git a/kube_ops_view/kubernetes.py b/kube_ops_view/kubernetes.py index 90692ba..ea0f510 100644 --- a/kube_ops_view/kubernetes.py +++ b/kube_ops_view/kubernetes.py @@ -4,6 +4,8 @@ from urllib.parse import urljoin import requests +from .utils import get_short_error_message + logger = logging.getLogger(__name__) session = requests.Session() @@ -90,8 +92,8 @@ def query_kubernetes_cluster(cluster): else: for metrics in data['items']: nodes[metrics['metadata']['name']]['usage'] = metrics['usage'] - except: - logger.exception('Failed to get node metrics') + except Exception as e: + logger.warning('Failed to query node metrics {}: {}'.format(cluster.id, get_short_error_message(e))) try: response = request(cluster, '/api/v1/namespaces/kube-system/services/heapster/proxy/apis/metrics/v1alpha1/pods') response.raise_for_status() @@ -106,6 +108,6 @@ def query_kubernetes_cluster(cluster): for container_metrics in metrics['containers']: if container['name'] == container_metrics['name']: container['resources']['usage'] = container_metrics['usage'] - except: - logger.exception('Failed to get pod metrics') + except Exception as e: + logger.warning('Failed to query pod metrics for cluster {}: {}'.format(cluster.id, get_short_error_message(e))) return {'id': cluster_id, 'api_server_url': api_server_url, 'nodes': nodes, 'unassigned_pods': unassigned_pods} diff --git a/kube_ops_view/update.py b/kube_ops_view/update.py index 385f56f..e6858ff 100644 --- a/kube_ops_view/update.py +++ b/kube_ops_view/update.py @@ -6,6 +6,7 @@ import json_delta import requests.exceptions from .backoff import expo, random_jitter +from .utils import get_short_error_message logger = logging.getLogger(__name__) @@ -14,19 +15,6 @@ def calculate_backoff(tries: int): return random_jitter(expo(tries, factor=2, max_value=60), jitter=4) -def get_short_error_message(e: requests.exceptions.RequestException): - '''Generate a reasonable short message why the HTTP request failed''' - - if e.response is not None: - # e.g. "401 Unauthorized" - return '{} {}'.format(e.response.status_code, e.response.reason) - elif isinstance(e, requests.exceptions.ConnectionError): - # e.g. "ConnectionError" or "ConnectTimeout" - return e.__class__.__name__ - else: - return str(e) - - def handle_query_failure(e: Exception, cluster, backoff: dict): if not backoff: backoff = {} @@ -34,11 +22,10 @@ def handle_query_failure(e: Exception, cluster, backoff: dict): backoff['tries'] = tries wait_seconds = calculate_backoff(tries) backoff['next_try'] = time.time() + wait_seconds + message = get_short_error_message(e) if isinstance(e, requests.exceptions.RequestException): - message = get_short_error_message(e) log = logger.error else: - message = str(e) log = logger.exception log('Failed to query cluster {} ({}): {} (try {}, wait {} seconds)'.format( cluster.id, cluster.api_server_url, message, tries, round(wait_seconds))) diff --git a/kube_ops_view/utils.py b/kube_ops_view/utils.py new file mode 100644 index 0000000..f2a9f93 --- /dev/null +++ b/kube_ops_view/utils.py @@ -0,0 +1,14 @@ +import requests.exceptions + + +def get_short_error_message(e: Exception): + '''Generate a reasonable short message why the HTTP request failed''' + + if isinstance(e, requests.exceptions.RequestException) and e.response is not None: + # e.g. "401 Unauthorized" + return '{} {}'.format(e.response.status_code, e.response.reason) + elif isinstance(e, requests.exceptions.ConnectionError): + # e.g. "ConnectionError" or "ConnectTimeout" + return e.__class__.__name__ + else: + return str(e)