From afb226bebd8350f8f6523f4692658b944bc2997f Mon Sep 17 00:00:00 2001 From: Paul Jakma Date: Tue, 21 Feb 2006 01:09:01 +0000 Subject: [bgpd] Record afi/safi in bgp_table. Serialise peer clear with FSM. 2006-02-21 Paul Jakma * bgpd.h: move the clear_node_queue to be peer specific. Add a new peer status flag, PEER_STATUS_CLEARING. * bgp_table.h: (struct bgp_table) Add fields to record afi, safi of the table. (bgp_table_init) Take afi and safi to create table for. * bgp_table.c: (bgp_table_init) record the afi and safi. * bgp_nexthop.c: Update all calls to bgp_table_init. * bgp_vty.c: ditto. * bgpd.c: ditto. * bgp_fsm.c: (bgp_timer_set) dont bring up a session which is clearing. * bgp_route.c: (general) Update all bgp_table_init calls. (bgp_process_{rsclient,main}) clear_node is serialised via PEER_STATUS_CLEARING and fsm now. (struct bgp_clear_node_queue) can be removed. struct bgp_node can be the queue item data directly, as struct peer can be kept in the new wq global user data and afi/safi can be retrieved via bgp_node -> bgp_table. (bgp_clear_route_node) fix to get peer via wq->spec.data, afi/safi via bgp_node->bgp_table. (bgp_clear_node_queue_del) no more item data to delete, only unlock the bgp_node. (bgp_clear_node_complete) only need to unset CLEARING flag and unlock struct peer. (bgp_clear_node_queue_init) queue attaches to struct peer now. record peer name as queue name. (bgp_clear_route_table) If queue transitions to active, serialise clearing by setting PEER_STATUS_CLEARING rather than plugging process queue, and lock peer while queue active. Update to pass only bgp_node as per-queue-item specific data. --- bgpd/ChangeLog | 34 +++++++++++++ bgpd/bgp_fsm.c | 1 + bgpd/bgp_nexthop.c | 12 ++--- bgpd/bgp_route.c | 145 ++++++++++++++++++++++++----------------------------- bgpd/bgp_table.c | 6 ++- bgpd/bgp_table.h | 8 ++- bgpd/bgp_vty.c | 2 +- bgpd/bgpd.c | 6 +-- bgpd/bgpd.h | 8 +-- 9 files changed, 126 insertions(+), 96 deletions(-) diff --git a/bgpd/ChangeLog b/bgpd/ChangeLog index c9d61898..9a960a38 100644 --- a/bgpd/ChangeLog +++ b/bgpd/ChangeLog @@ -1,3 +1,37 @@ +2006-02-21 Paul Jakma + + * bgpd.h: move the clear_node_queue to be peer specific. + Add a new peer status flag, PEER_STATUS_CLEARING. + * bgp_table.h: (struct bgp_table) Add fields to record afi, + safi of the table. + (bgp_table_init) Take afi and safi to create table for. + * bgp_table.c: (bgp_table_init) record the afi and safi. + * bgp_nexthop.c: Update all calls to bgp_table_init. + * bgp_vty.c: ditto. + * bgpd.c: ditto. + * bgp_fsm.c: (bgp_timer_set) dont bring up a session which is + clearing. + * bgp_route.c: (general) Update all bgp_table_init calls. + (bgp_process_{rsclient,main}) clear_node is serialised + via PEER_STATUS_CLEARING and fsm now. + (struct bgp_clear_node_queue) can be removed. struct bgp_node + can be the queue item data directly, as struct peer can be + kept in the new wq global user data and afi/safi can be + retrieved via bgp_node -> bgp_table. + (bgp_clear_route_node) fix to get peer via wq->spec.data, + afi/safi via bgp_node->bgp_table. + (bgp_clear_node_queue_del) no more item data to delete, only + unlock the bgp_node. + (bgp_clear_node_complete) only need to unset CLEARING flag + and unlock struct peer. + (bgp_clear_node_queue_init) queue attaches to struct peer + now. record peer name as queue name. + (bgp_clear_route_table) If queue transitions to active, + serialise clearing by setting PEER_STATUS_CLEARING rather + than plugging process queue, and lock peer while queue + active. + Update to pass only bgp_node as per-queue-item specific data. + 2006-02-18 Paul Jakma * bgp_routemap.c: (route_set_community) Quick, very hacky, fix diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 220861c1..f5f7892b 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -84,6 +84,7 @@ bgp_timer_set (struct peer *peer) inactive. All other timer must be turned off */ if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN) || CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW) + || CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING) || ! peer_active (peer)) { BGP_TIMER_OFF (peer->t_start); diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index ba1752fb..ea707ccb 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -1330,17 +1330,17 @@ bgp_scan_init () bgp_scan_interval = BGP_SCAN_INTERVAL_DEFAULT; bgp_import_interval = BGP_IMPORT_INTERVAL_DEFAULT; - cache1_table[AFI_IP] = bgp_table_init (); - cache2_table[AFI_IP] = bgp_table_init (); + cache1_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST); + cache2_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST); bgp_nexthop_cache_table[AFI_IP] = cache1_table[AFI_IP]; - bgp_connected_table[AFI_IP] = bgp_table_init (); + bgp_connected_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST); #ifdef HAVE_IPV6 - cache1_table[AFI_IP6] = bgp_table_init (); - cache2_table[AFI_IP6] = bgp_table_init (); + cache1_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST); + cache2_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST); bgp_nexthop_cache_table[AFI_IP6] = cache1_table[AFI_IP6]; - bgp_connected_table[AFI_IP6] = bgp_table_init (); + bgp_connected_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST); #endif /* HAVE_IPV6 */ /* Make BGP scan thread. */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 6c21e3f3..a73974ff 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -71,7 +71,7 @@ bgp_afi_node_get (struct bgp_table *table, afi_t afi, safi_t safi, struct prefix prn = bgp_node_get (table, (struct prefix *) prd); if (prn->info == NULL) - prn->info = bgp_table_init (); + prn->info = bgp_table_init (afi, safi); else bgp_unlock_node (prn); table = prn->info; @@ -1257,13 +1257,6 @@ bgp_process_rsclient (struct work_queue *wq, void *data) struct listnode *node, *nnode; struct peer *rsclient = rn->table->owner; - /* we shouldn't run if the clear_route_node queue is still running - * or scheduled to run, or we can race with session coming up - * and adding routes back before we've cleared them - */ - if (bm->clear_node_queue && bm->clear_node_queue->thread) - return WQ_QUEUE_BLOCKED; - /* Best path selection. */ bgp_best_selection (bgp, rn, &old_and_new); new_select = old_and_new.new; @@ -1326,13 +1319,6 @@ bgp_process_main (struct work_queue *wq, void *data) struct peer *peer; struct attr attr; - /* we shouldn't run if the clear_route_node queue is still running - * or scheduled to run, or we can race with session coming up - * and adding routes back before we've cleared them - */ - if (bm->clear_node_queue && bm->clear_node_queue->thread) - return WQ_QUEUE_BLOCKED; - /* Best path selection. */ bgp_best_selection (bgp, rn, &old_and_new); old_select = old_and_new.old; @@ -2465,54 +2451,49 @@ bgp_soft_reconfig_in (struct peer *peer, afi_t afi, safi_t safi) bgp_soft_reconfig_table (peer, afi, safi, table); } -struct bgp_clear_node_queue -{ - struct bgp_node *rn; - struct peer *peer; - afi_t afi; - safi_t safi; -}; - static wq_item_status bgp_clear_route_node (struct work_queue *wq, void *data) { - struct bgp_clear_node_queue *cq = data; + struct bgp_node *rn = data; + struct peer *peer = wq->spec.data; struct bgp_adj_in *ain; struct bgp_adj_out *aout; struct bgp_info *ri; + afi_t afi = rn->table->afi; + safi_t safi = rn->table->safi; - assert (cq->rn && cq->peer); + assert (rn && peer); - for (ri = cq->rn->info; ri; ri = ri->next) - if (ri->peer == cq->peer) + for (ri = rn->info; ri; ri = ri->next) + if (ri->peer == peer) { /* graceful restart STALE flag set. */ - if (CHECK_FLAG (cq->peer->sflags, PEER_STATUS_NSF_WAIT) - && cq->peer->nsf[cq->afi][cq->safi] + if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT) + && peer->nsf[afi][safi] && ! CHECK_FLAG (ri->flags, BGP_INFO_STALE) && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY) && ! CHECK_FLAG (ri->flags, BGP_INFO_DAMPED) && ! CHECK_FLAG (ri->flags, BGP_INFO_REMOVED)) { - bgp_pcount_decrement (cq->rn, ri, cq->afi, cq->safi); + bgp_pcount_decrement (rn, ri, afi, safi); SET_FLAG (ri->flags, BGP_INFO_STALE); } else - bgp_rib_remove (cq->rn, ri, cq->peer, cq->afi, cq->safi); + bgp_rib_remove (rn, ri, peer, afi, safi); break; } - for (ain = cq->rn->adj_in; ain; ain = ain->next) - if (ain->peer == cq->peer) + for (ain = rn->adj_in; ain; ain = ain->next) + if (ain->peer == peer) { - bgp_adj_in_remove (cq->rn, ain); - bgp_unlock_node (cq->rn); + bgp_adj_in_remove (rn, ain); + bgp_unlock_node (rn); break; } - for (aout = cq->rn->adj_out; aout; aout = aout->next) - if (aout->peer == cq->peer) + for (aout = rn->adj_out; aout; aout = aout->next) + if (aout->peer == peer) { - bgp_adj_out_remove (cq->rn, aout, cq->peer, cq->afi, cq->safi); - bgp_unlock_node (cq->rn); + bgp_adj_out_remove (rn, aout, peer, afi, safi); + bgp_unlock_node (rn); break; } return WQ_SUCCESS; @@ -2521,78 +2502,84 @@ bgp_clear_route_node (struct work_queue *wq, void *data) static void bgp_clear_node_queue_del (struct work_queue *wq, void *data) { - struct bgp_clear_node_queue *cq = data; - bgp_unlock_node (cq->rn); - peer_unlock (cq->peer); /* bgp_clear_node_queue_del */ - XFREE (MTYPE_BGP_CLEAR_NODE_QUEUE, cq); + struct bgp_node *rn = data; + + bgp_unlock_node (rn); } static void bgp_clear_node_complete (struct work_queue *wq) { - /* unplug the 2 processing queues */ - if (bm->process_main_queue) - work_queue_unplug (bm->process_main_queue); - if (bm->process_rsclient_queue) - work_queue_unplug (bm->process_rsclient_queue); + struct peer *peer = wq->spec.data; + + UNSET_FLAG (peer->sflags, PEER_STATUS_CLEARING); + peer_unlock (peer); /* bgp_clear_node_complete */ } static void -bgp_clear_node_queue_init (void) +bgp_clear_node_queue_init (struct peer *peer) { - if ( (bm->clear_node_queue - = work_queue_new (bm->master, "clear_route_node")) == NULL) +#define CLEAR_QUEUE_NAME_LEN 26 /* "clear 2001:123:123:123::1" */ + char wname[CLEAR_QUEUE_NAME_LEN]; + + snprintf (wname, CLEAR_QUEUE_NAME_LEN, "clear %s", peer->host); +#undef CLEAR_QUEUE_NAME_LEN + + if ( (peer->clear_node_queue = work_queue_new (bm->master, wname)) == NULL) { zlog_err ("%s: Failed to allocate work queue", __func__); exit (1); } - bm->clear_node_queue->spec.hold = 10; - bm->clear_node_queue->spec.workfunc = &bgp_clear_route_node; - bm->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del; - bm->clear_node_queue->spec.completion_func = &bgp_clear_node_complete; - bm->clear_node_queue->spec.max_retries = 0; + peer->clear_node_queue->spec.hold = 10; + peer->clear_node_queue->spec.workfunc = &bgp_clear_route_node; + peer->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del; + peer->clear_node_queue->spec.completion_func = &bgp_clear_node_complete; + peer->clear_node_queue->spec.max_retries = 0; + + /* we only 'lock' this peer reference when the queue is actually active */ + peer->clear_node_queue->spec.data = peer; } static void bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, struct bgp_table *table, struct peer *rsclient) { - struct bgp_clear_node_queue *cqnode; struct bgp_node *rn; if (! table) table = (rsclient) ? rsclient->rib[afi][safi] : peer->bgp->rib[afi][safi]; - + /* If still no table => afi/safi isn't configured at all or smth. */ if (! table) return; - if (bm->clear_node_queue == NULL) - bgp_clear_node_queue_init (); + if (peer->clear_node_queue == NULL) + bgp_clear_node_queue_init (peer); - /* plug the two bgp_process queues to avoid any chance of racing - * with a session coming back up and adding routes before we've - * cleared them all. We'll unplug them with completion callback. + /* bgp_fsm.c will not bring CLEARING sessions out of Idle this + * protects against peers which flap faster than we can we clear, + * which could lead to: + * + * a) race with routes from the new session being installed before + * clear_route_node visits the node (to delete the route of that + * peer) + * b) resource exhaustion, clear_route_node likely leads to an entry + * on the process_main queue. Fast-flapping could cause that queue + * to grow and grow. */ - if (bm->process_main_queue) - work_queue_plug (bm->process_main_queue); - if (bm->process_rsclient_queue) - work_queue_plug (bm->process_rsclient_queue); + if (!CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING)) + { + SET_FLAG (peer->sflags, PEER_STATUS_CLEARING); + peer_lock (peer); /* bgp_clear_node_complete */ + } for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) { if (rn->info == NULL) continue; - if ( (cqnode = XCALLOC (MTYPE_BGP_CLEAR_NODE_QUEUE, - sizeof (struct bgp_clear_node_queue))) == NULL) - continue; - - cqnode->rn = bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */ - cqnode->afi = afi; - cqnode->safi = safi; - cqnode->peer = peer_lock (peer); /* bgp_clear_node_queue_del */ - work_queue_add (bm->clear_node_queue, cqnode); + bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */ + work_queue_add (peer->clear_node_queue, rn); } return; } @@ -3524,7 +3511,7 @@ bgp_static_set_vpnv4 (struct vty *vty, const char *ip_str, const char *rd_str, prn = bgp_node_get (bgp->route[AFI_IP][SAFI_MPLS_VPN], (struct prefix *)&prd); if (prn->info == NULL) - prn->info = bgp_table_init (); + prn->info = bgp_table_init (AFI_IP, SAFI_MPLS_VPN); else bgp_unlock_node (prn); table = prn->info; @@ -3593,7 +3580,7 @@ bgp_static_unset_vpnv4 (struct vty *vty, const char *ip_str, prn = bgp_node_get (bgp->route[AFI_IP][SAFI_MPLS_VPN], (struct prefix *)&prd); if (prn->info == NULL) - prn->info = bgp_table_init (); + prn->info = bgp_table_init (AFI_IP, SAFI_MPLS_VPN); else bgp_unlock_node (prn); table = prn->info; @@ -10408,7 +10395,7 @@ void bgp_route_init () { /* Init BGP distance table. */ - bgp_distance_table = bgp_table_init (); + bgp_distance_table = bgp_table_init (AFI_IP, SAFI_UNICAST); /* IPv4 BGP commands. */ install_element (BGP_NODE, &bgp_network_cmd); diff --git a/bgpd/bgp_table.c b/bgpd/bgp_table.c index a9199d64..a3b489d5 100644 --- a/bgpd/bgp_table.c +++ b/bgpd/bgp_table.c @@ -32,7 +32,7 @@ void bgp_node_delete (struct bgp_node *); void bgp_table_free (struct bgp_table *); struct bgp_table * -bgp_table_init (void) +bgp_table_init (afi_t afi, safi_t safi) { struct bgp_table *rt; @@ -40,7 +40,9 @@ bgp_table_init (void) memset (rt, 0, sizeof (struct bgp_table)); rt->type = BGP_TABLE_MAIN; - + rt->afi = afi; + rt->safi = safi; + return rt; } diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h index d7c8272b..e13022bb 100644 --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -30,7 +30,11 @@ typedef enum struct bgp_table { bgp_table_t type; - + + /* afi/safi of this table */ + afi_t afi; + safi_t safi; + /* The owner of this 'bgp_table' structure. */ void *owner; @@ -63,7 +67,7 @@ struct bgp_node #define BGP_NODE_PROCESS_SCHEDULED (1 << 0) }; -extern struct bgp_table *bgp_table_init (void); +extern struct bgp_table *bgp_table_init (afi_t, safi_t); extern void bgp_table_finish (struct bgp_table *); extern void bgp_unlock_node (struct bgp_node *node); extern void bgp_node_delete (struct bgp_node *node); diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index 6ee48236..3cf4dc59 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -2048,7 +2048,7 @@ peer_rsclient_set_vty (struct vty *vty, const char *peer_str, if (ret < 0) return bgp_vty_return (vty, ret); - peer->rib[afi][safi] = bgp_table_init (); + peer->rib[afi][safi] = bgp_table_init (afi, safi); peer->rib[afi][safi]->type = BGP_TABLE_RSCLIENT; peer->rib[afi][safi]->owner = peer; diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 1f1b4fd2..9f694f5e 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1866,9 +1866,9 @@ bgp_create (as_t *as, const char *name) for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) { - bgp->route[afi][safi] = bgp_table_init (); - bgp->aggregate[afi][safi] = bgp_table_init (); - bgp->rib[afi][safi] = bgp_table_init (); + bgp->route[afi][safi] = bgp_table_init (afi, safi); + bgp->aggregate[afi][safi] = bgp_table_init (afi, safi); + bgp->rib[afi][safi] = bgp_table_init (afi, safi); } bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 44045c06..9e2aa3e0 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -40,7 +40,6 @@ struct bgp_master /* work queues */ struct work_queue *process_main_queue; struct work_queue *process_rsclient_queue; - struct work_queue *clear_node_queue; /* BGP port number. */ u_int16_t port; @@ -387,6 +386,7 @@ struct peer #define PEER_STATUS_GROUP (1 << 4) /* peer-group conf */ #define PEER_STATUS_NSF_MODE (1 << 5) /* NSF aware peer */ #define PEER_STATUS_NSF_WAIT (1 << 6) /* wait comeback peer */ +#define PEER_STATUS_CLEARING (1 << 7) /* peers table being cleared */ /* Peer status af flags (reset in bgp_stop) */ u_int16_t af_sflags[AFI_MAX][SAFI_MAX]; @@ -398,7 +398,6 @@ struct peer #define PEER_STATUS_EOR_SEND (1 << 5) /* end-of-rib send to peer */ #define PEER_STATUS_EOR_RECEIVED (1 << 6) /* end-of-rib received from peer */ - /* Default attribute value for the peer. */ u_int32_t config; #define PEER_CONFIG_WEIGHT (1 << 0) /* Default weight. */ @@ -433,7 +432,10 @@ struct peer struct thread *t_pmax_restart; struct thread *t_gr_restart; struct thread *t_gr_stale; - + + /* workqueues */ + struct work_queue *clear_node_queue; + /* Statistics field */ u_int32_t open_in; /* Open message input count */ u_int32_t open_out; /* Open message output count */ -- cgit v1.2.1