better redis error handling
This commit is contained in:
@@ -106,6 +106,51 @@ class RedisPubSubSubscriber(EventSubscriber):
|
||||
|
||||
self._topics.extend(topics)
|
||||
|
||||
async def _reconnect(self) -> None:
|
||||
"""Attempt to reconnect to Redis."""
|
||||
max_retries = 10
|
||||
base_delay = 1.0
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Clean up old connections
|
||||
if self._pubsub:
|
||||
try:
|
||||
await self._pubsub.close()
|
||||
except Exception:
|
||||
pass
|
||||
if self._client:
|
||||
try:
|
||||
await self._client.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Reconnect
|
||||
self._client = redis.from_url(self.redis_url, decode_responses=True)
|
||||
await self._client.ping()
|
||||
self._pubsub = self._client.pubsub()
|
||||
|
||||
# Re-subscribe to topics
|
||||
if self._topics:
|
||||
patterns = [t for t in self._topics if "*" in t]
|
||||
channels = [t for t in self._topics if "*" not in t]
|
||||
if channels:
|
||||
await self._pubsub.subscribe(*channels)
|
||||
if patterns:
|
||||
await self._pubsub.psubscribe(*patterns)
|
||||
|
||||
logger.info(f"Reconnected to Redis at {self.redis_url}")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
delay = min(base_delay * (2**attempt), 30.0)
|
||||
logger.warning(
|
||||
f"Reconnect attempt {attempt + 1} failed: {e}, retrying in {delay}s"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
raise RuntimeError(f"Failed to reconnect to Redis after {max_retries} attempts")
|
||||
|
||||
async def consume(self) -> AsyncIterator[Event]:
|
||||
if not self._pubsub:
|
||||
raise RuntimeError("Subscriber not connected")
|
||||
@@ -138,5 +183,9 @@ class RedisPubSubSubscriber(EventSubscriber):
|
||||
self._running = False
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error consuming events: {e}")
|
||||
await asyncio.sleep(1.0)
|
||||
logger.error(f"Error consuming events: {e}, attempting reconnect...")
|
||||
try:
|
||||
await self._reconnect()
|
||||
except Exception as reconnect_err:
|
||||
logger.error(f"Reconnect failed: {reconnect_err}")
|
||||
await asyncio.sleep(5.0)
|
||||
|
||||
Reference in New Issue
Block a user