diff options
author | Paul Jakma <paul.jakma@sun.com> | 2007-02-22 17:48:42 +0000 |
---|---|---|
committer | Paul Jakma <paul.jakma@sun.com> | 2007-02-22 17:48:42 +0000 |
commit | f2c31acb6f97688af0f368211536829324145919 (patch) | |
tree | 6f9887992eeb0890fdf673f9c0794e7d1384edbb | |
parent | 553bdfe376c49886cbdc2d306fea7b003bead31f (diff) |
[bgpd] Peer delete can race with reconfig leading to crash
2007-02-22 Paul Jakma <paul.jakma@sun.com>
* bgp_fsm.c: (bgp_fsm_change_status) Handle state change into
clearing or greater here. Simpler.
(bgp_event) Clearing state change work moved to previous
* bgp_route.c: (bgp_clear_route_node) Clearing adj-in here
is too late, as it leaves a race between a peer being deleted
and an identical peer being configured before clearing
completes, leading to a crash.
Simplest fix is to clean peers Adj-in up-front, rather than
queueing such work.
(bgp_clear_route_table) Clear peer's Adj-In and Adj-Out
up-front here, rather than queueing such work.
Extensive comment added on the various bits of indexed data
that exist and how they need to be dealt with.
(bgp_clear_route) Update comment.
-rw-r--r-- | bgpd/ChangeLog | 17 | ||||
-rw-r--r-- | bgpd/bgp_fsm.c | 16 | ||||
-rw-r--r-- | bgpd/bgp_route.c | 92 |
3 files changed, 92 insertions, 33 deletions
diff --git a/bgpd/ChangeLog b/bgpd/ChangeLog index 257b0eeb..7e964c95 100644 --- a/bgpd/ChangeLog +++ b/bgpd/ChangeLog @@ -1,3 +1,20 @@ +2007-02-22 Paul Jakma <paul.jakma@sun.com> + + * bgp_fsm.c: (bgp_fsm_change_status) Handle state change into + clearing or greater here. Simpler. + (bgp_event) Clearing state change work moved to previous + * bgp_route.c: (bgp_clear_route_node) Clearing adj-in here + is too late, as it leaves a race between a peer being deleted + and an identical peer being configured before clearing + completes, leading to a crash. + Simplest fix is to clean peers Adj-in up-front, rather than + queueing such work. + (bgp_clear_route_table) Clear peer's Adj-In and Adj-Out + up-front here, rather than queueing such work. + Extensive comment added on the various bits of indexed data + that exist and how they need to be dealt with. + (bgp_clear_route) Update comment. + 2006-12-12 Andrew J. Schorr <ajschorr@alumni.princeton.edu> * bgp_nexthop.c: (bgp_connected_add, bgp_connected_delete) diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index d704c297..db7e69af 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -409,10 +409,16 @@ bgp_fsm_change_status (struct peer *peer, int status) { bgp_dump_state (peer, peer->status, status); + /* Transition into Clearing or Deleted must /always/ clear all routes.. + * (and must do so before actually changing into Deleted.. + */ + if (status >= Clearing) + bgp_clear_route_all (peer); + /* Preserve old status and change into new status. */ peer->ostatus = peer->status; peer->status = status; - + if (BGP_DEBUG (normal, NORMAL)) zlog_debug ("%s went from %s to %s", peer->host, @@ -1089,13 +1095,7 @@ bgp_event (struct thread *thread) { /* If status is changed. */ if (next != peer->status) - { - /* Transition into Clearing must /always/ clear all routes.. */ - if (next == Clearing) - bgp_clear_route_all (peer); - - bgp_fsm_change_status (peer, next); - } + bgp_fsm_change_status (peer, next); /* Make sure timer is set. */ bgp_timer_set (peer); diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index ce44842e..c464cd04 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -2490,8 +2490,6 @@ bgp_clear_route_node (struct work_queue *wq, void *data) { struct bgp_node *rn = data; struct peer *peer = wq->spec.data; - struct bgp_adj_in *ain; - struct bgp_adj_out *aout; struct bgp_info *ri; afi_t afi = rn->table->afi; safi_t safi = rn->table->safi; @@ -2511,20 +2509,6 @@ bgp_clear_route_node (struct work_queue *wq, void *data) bgp_rib_remove (rn, ri, peer, afi, safi); break; } - for (ain = rn->adj_in; ain; ain = ain->next) - if (ain->peer == peer) - { - bgp_adj_in_remove (rn, ain); - bgp_unlock_node (rn); - break; - } - for (aout = rn->adj_out; aout; aout = aout->next) - if (aout->peer == peer) - { - bgp_adj_out_remove (rn, aout, peer, afi, safi); - bgp_unlock_node (rn); - break; - } return WQ_SUCCESS; } @@ -2577,6 +2561,7 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, { struct bgp_node *rn; + if (! table) table = (rsclient) ? rsclient->rib[afi][safi] : peer->bgp->rib[afi][safi]; @@ -2586,11 +2571,65 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) { + struct bgp_info *ri; + struct bgp_adj_in *ain; + struct bgp_adj_out *aout; + if (rn->info == NULL) continue; - - bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */ - work_queue_add (peer->clear_node_queue, rn); + + /* XXX:TODO: This is suboptimal, every non-empty route_node is + * queued for every clearing peer, regardless of whether it is + * relevant to the peer at hand. + * + * Overview: There are 3 different indices which need to be + * scrubbed, potentially, when a peer is removed: + * + * 1 peer's routes visible via the RIB (ie accepted routes) + * 2 peer's routes visible by the (optional) peer's adj-in index + * 3 other routes visible by the peer's adj-out index + * + * 3 there is no hurry in scrubbing, once the struct peer is + * removed from bgp->peer, we could just GC such deleted peer's + * adj-outs at our leisure. + * + * 1 and 2 must be 'scrubbed' in some way, at least made + * invisible via RIB index before peer session is allowed to be + * brought back up. So one needs to know when such a 'search' is + * complete. + * + * Ideally: + * + * - there'd be a single global queue or a single RIB walker + * - rather than tracking which route_nodes still need to be + * examined on a peer basis, we'd track which peers still + * aren't cleared + * + * Given that our per-peer prefix-counts now should be reliable, + * this may actually be achievable. It doesn't seem to be a huge + * problem at this time, + */ + for (ri = rn->info; ri; ri = ri->next) + if (ri->peer == peer) + { + bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */ + work_queue_add (peer->clear_node_queue, rn); + } + + for (ain = rn->adj_in; ain; ain = ain->next) + if (ain->peer == peer) + { + bgp_adj_in_remove (rn, ain); + bgp_unlock_node (rn); + break; + } + for (aout = rn->adj_out; aout; aout = aout->next) + if (aout->peer == peer) + { + bgp_adj_out_remove (rn, aout, peer, afi, safi); + bgp_unlock_node (rn); + break; + } } return; } @@ -2636,15 +2675,18 @@ bgp_clear_route (struct peer *peer, afi_t afi, safi_t safi) } /* If no routes were cleared, nothing was added to workqueue, the - * completion function won't be run by workqueue code - call it here. + * completion function won't be run by workqueue code - call it here. + * XXX: Actually, this assumption doesn't hold, see + * bgp_clear_route_table(), we queue all non-empty nodes. * * Additionally, there is a presumption in FSM that clearing is only - * needed if peer state is Established - peers in pre-Established states - * shouldn't have any route-update state associated with them (in or out). + * really needed if peer state is Established - peers in + * pre-Established states shouldn't have any route-update state + * associated with them (in or out). * - * We still get here from FSM through bgp_stop->clear_route_all in - * pre-Established though, so this is a useful sanity check to ensure - * the assumption above holds. + * We still can get here in pre-Established though, through + * peer_delete -> bgp_fsm_change_status, so this is a useful sanity + * check to ensure the assumption above holds. * * At some future point, this check could be move to the top of the * function, and do a quick early-return when state is |