from dataclasses import dataclass, field
from typing import FrozenSet
_DEFAULT_RETRYABLE_STATUS_CODES: FrozenSet[int] = frozenset({408, 429, 502, 503, 504})
[docs]
@dataclass(frozen=True)
class RetryConfig:
"""
Configures retry behavior for the Feldera HTTP client.
Retries are attempted on transient failures: connection/read timeouts and
the HTTP statuses listed in `retryable_status_codes` (408, 429, 502, 503,
504 by default).
Wait strategies:
- 408, 429, 503, 504 and connection/read timeouts use exponential
backoff: `min(initial_backoff * (multiplier ** n), max_backoff)`,
plus a uniform random `[0, jitter)` term, where `n` is the
zero-based retry index.
- 502 uses cluster-aware backoff: the client probes
`/cluster_healthz`; if the cluster is healthy, the 502 is treated as
spurious and the next retry runs immediately (wait = 0). If the
cluster reports unhealthy (e.g. an upgrade is in progress), the next
retry waits `unhealthy_backoff` seconds.
- A server-supplied `Retry-After` header always overrides the computed
wait (capped at `max_backoff`).
:param max_retries: Number of retries to attempt after the initial request.
A value of `3` means up to `4` total attempts. Must be `>= 0`.
Default: `3`.
:param initial_backoff: Base wait in seconds before the first retry.
Default: `2.0`.
:param max_backoff: Maximum wait in seconds between retries. The computed
exponential wait is clamped to this value. Default: `64.0`.
:param multiplier: Exponential base applied to `initial_backoff` for each
successive retry. Default: `2.0`.
:param jitter: Maximum random extra wait in seconds added to each
exponential backoff (drawn uniformly from `[0, jitter)`). Helps avoid
thundering-herd retries when many clients fail at once.
Default: `0.0` (no jitter).
:param unhealthy_backoff: Flat wait in seconds between 502 retries when
the cluster reports unhealthy on `/cluster_healthz`. The cluster is
likely upgrading/restarting, so a flat pause is preferable to an
exponential ramp. Default: `90.0`.
:param retryable_status_codes: HTTP status codes that should trigger a
retry. Default: `{408, 429, 502, 503, 504}`.
"""
max_retries: int = 3
initial_backoff: float = 2.0
max_backoff: float = 64.0
multiplier: float = 2.0
jitter: float = 0.0
unhealthy_backoff: float = 90.0
retryable_status_codes: FrozenSet[int] = field(
default_factory=lambda: _DEFAULT_RETRYABLE_STATUS_CODES
)
def __post_init__(self) -> None:
if self.max_retries < 0:
raise ValueError("max_retries must be >= 0")
if self.initial_backoff < 0:
raise ValueError("initial_backoff must be >= 0")
if self.max_backoff < 0:
raise ValueError("max_backoff must be >= 0")
if self.multiplier <= 0:
raise ValueError("multiplier must be > 0")
if self.jitter < 0:
raise ValueError("jitter must be >= 0")
if self.unhealthy_backoff < 0:
raise ValueError("unhealthy_backoff must be >= 0")
# Coerce to frozenset so callers can pass a set/list without surprises,
# and so equality comparisons against the default behave intuitively.
if not isinstance(self.retryable_status_codes, frozenset):
object.__setattr__(
self, "retryable_status_codes", frozenset(self.retryable_status_codes)
)