diff options
-rw-r--r-- | bgpd/bgp_packet.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 4d7f32de..271a21a0 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -1307,6 +1307,51 @@ bgp_open_receive (struct peer *peer, bgp_size_t size) && realpeer->status != OpenConfirm) { + /* XXX: This is an awful problem.. + * + * According to the RFC we should just let this connection (of the + * accepted 'peer') continue on to Established if the other + * connection (the 'realpeer' one) is in state Connect, and deal + * with the more larval FSM as/when it gets far enough to receive + * an Open. We don't do that though, we instead close the (more + * developed) accepted connection. + * + * This means there's a race, which if hit, can loop: + * + * FSM for A FSM for B + * realpeer accept-peer realpeer accept-peer + * + * Connect Connect + * Active + * OpenSent OpenSent + * <arrive here, + * Notify, delete> + * Idle Active + * OpenSent OpenSent + * <arrive here, + * Notify, delete> + * Idle + * <wait> <wait> + * Connect Connect + * + * + * If both sides are Quagga, they're almost certain to wait for + * the same amount of time of course (which doesn't preclude other + * implementations also waiting for same time). The race is + * exacerbated by high-latency (in bgpd and/or the network). + * + * The reason we do this is because our FSM is tied to our peer + * structure, which carries our configuration information, etc. + * I.e. we can't let the accepted-peer FSM continue on as it is, + * cause it's not associated with any actual peer configuration - + * it's just a dummy. + * + * It's possible we could hack-fix this by just bgp_stop'ing the + * realpeer and continueing on with the 'transfer FSM' below. + * Ideally, we need to seperate FSMs from struct peer. + * + * Setting one side to passive avoids the race, as a workaround. + */ if (BGP_DEBUG (events, EVENTS)) zlog_debug ("%s peer status is %s close connection", realpeer->host, LOOKUP (bgp_status_msg, |