diff options
-rw-r--r-- | bgpd/ChangeLog | 52 | ||||
-rw-r--r-- | bgpd/bgp_debug.c | 2 | ||||
-rw-r--r-- | bgpd/bgp_fsm.c | 104 | ||||
-rw-r--r-- | bgpd/bgp_fsm.h | 17 | ||||
-rw-r--r-- | bgpd/bgp_packet.c | 27 | ||||
-rw-r--r-- | bgpd/bgp_route.c | 47 | ||||
-rw-r--r-- | bgpd/bgp_vty.c | 2 | ||||
-rw-r--r-- | bgpd/bgpd.c | 85 | ||||
-rw-r--r-- | bgpd/bgpd.h | 8 |
9 files changed, 251 insertions, 93 deletions
diff --git a/bgpd/ChangeLog b/bgpd/ChangeLog index 482beeda..02aaf3ab 100644 --- a/bgpd/ChangeLog +++ b/bgpd/ChangeLog @@ -1,3 +1,55 @@ +2006-09-14 Paul Jakma <paul.jakma@sun.com> + + * (general) Fix some niggly issues around 'shutdown' and clearing + by adding a Clearing FSM wait-state and a hidden 'Deleted' + FSM state, to allow deleted peers to 'cool off' and hit 0 + references. This introduces a slow memory leak of struct peer, + however that's more a testament to the fragility of the + reference counting than a bug in this patch, cleanup of + reference counting to fix this is to follow. + * bgpd.h: Add Clearing, Deleted states and Clearing_Completed + and event. + * bgp_debug.c: (bgp_status_msg[]) Add strings for Clearing and + Deleted. + * bgp_fsm.h: Don't allow timer/event threads to set anything + for Deleted peers. + * bgp_fsm.c: (bgp_timer_set) Add Clearing and Deleted. Deleted + needs to stop everything. + (bgp_stop) Remove explicit fsm_change_status call, the + general framework handles the transition. + (bgp_start) Log a warning if a start is attempted on a peer + that should stay down, trying to start a peer. + (struct .. FSM) Add Clearing_Completed + events, has little influence except when in state + Clearing to signal wait-state can end. + Add Clearing and Deleted states, former is a wait-state, + latter is a placeholder state to allow peers to disappear + quietly once refcounts settle. + (bgp_event) Try reduce verbosity of FSM state-change debug, + changes to same state are not interesting (Established->Established) + Allow NULL action functions in FSM. + * bgp_packet.c: (bgp_write) Use FSM events, rather than trying + to twiddle directly with FSM state behind the back of FSM. + (bgp_write_notify) ditto. + (bgp_read) Remove the vague ACCEPT_PEER peer_unlock, or else + this patch crashes, now it leaks instead. + * bgp_route.c: (bgp_clear_node_complete) Clearing_Completed + event, to end clearing. + (bgp_clear_route) See extensive comments. + * bgpd.c: (peer_free) should only be called while in Deleted, + peer refcounting controls when peer_free is called. + bgp_sync_delete should be here, not in peer_delete. + (peer_delete) Initiate delete. + Transition to Deleted state manually. + When removing peer from indices that provide visibility of it, + take great care to be idempotent wrt the reference counting + of struct peer through those indices. + Use bgp_timer_set, rather than replicating. + Call to bgp_sync_delete isn't appropriate here, sync can be + referenced while shutting down and finishing deletion. + (peer_group_bind) Take care to be idempotent wrt list references + indexing peers. + 2006-09-13 Paul Jakma <paul.jakma@sun.com> * bgp_aspath.c: (aspath_highest) new, return highest ASN in an diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 1b398ee8..1e0fcd1f 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -62,6 +62,8 @@ struct message bgp_status_msg[] = { OpenSent, "OpenSent" }, { OpenConfirm, "OpenConfirm" }, { Established, "Established" }, + { Clearing, "Clearing" }, + { Deleted, "Deleted" }, }; int bgp_status_msg_max = BGP_STATUS_MAX; diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 770a7911..bdb6517f 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -68,6 +68,11 @@ bgp_start_jitter (int time) return ((rand () % (time + 1)) - (time / 2)); } +/* Check if suppress start/restart of sessions to peer. */ +#define BGP_PEER_START_SUPPRESSED(P) \ + (CHECK_FLAG ((P)->flags, PEER_FLAG_SHUTDOWN) \ + || CHECK_FLAG ((P)->sflags, PEER_STATUS_PREFIX_OVERFLOW)) + /* Hook function called after bgp event is occered. And vty's neighbor command invoke this function after making neighbor structure. */ @@ -82,10 +87,7 @@ bgp_timer_set (struct peer *peer) /* First entry point of peer's finite state machine. In Idle status start timer is on unless peer is shutdown or peer is inactive. All other timer must be turned off */ - if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN) - || CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW) - || CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING) - || ! peer_active (peer)) + if (BGP_PEER_START_SUPPRESSED (peer) || ! peer_active (peer)) { BGP_TIMER_OFF (peer->t_start); } @@ -197,6 +199,17 @@ bgp_timer_set (struct peer *peer) } BGP_TIMER_OFF (peer->t_asorig); break; + case Deleted: + BGP_TIMER_OFF (peer->t_gr_restart); + BGP_TIMER_OFF (peer->t_gr_stale); + BGP_TIMER_OFF (peer->t_pmax_restart); + case Clearing: + BGP_TIMER_OFF (peer->t_start); + BGP_TIMER_OFF (peer->t_connect); + BGP_TIMER_OFF (peer->t_holdtime); + BGP_TIMER_OFF (peer->t_keepalive); + BGP_TIMER_OFF (peer->t_asorig); + BGP_TIMER_OFF (peer->t_routeadv); } } @@ -420,7 +433,6 @@ bgp_stop (struct peer *peer) if (peer->status == Established) { peer->dropped++; - bgp_fsm_change_status (peer, Idle); /* bgp log-neighbor-changes of neighbor Down */ if (bgp_flag_check (peer->bgp, BGP_FLAG_LOG_NEIGHBOR_CHANGES)) @@ -625,6 +637,14 @@ bgp_start (struct peer *peer) { int status; + if (BGP_PEER_START_SUPPRESSED (peer)) + { + if (BGP_DEBUG (fsm, FSM)) + plog_err (peer->log, "%s [FSM] Trying to start suppressed peer" + " - this is never supposed to happen!", peer->host); + return -1; + } + /* Scrub some information that might be left over from a previous, * session */ @@ -903,6 +923,7 @@ struct { {bgp_ignore, Idle}, /* Receive_KEEPALIVE_message */ {bgp_ignore, Idle}, /* Receive_UPDATE_message */ {bgp_ignore, Idle}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle}, /* Clearing_Completed */ }, { /* Connect */ @@ -919,6 +940,7 @@ struct { {bgp_ignore, Idle}, /* Receive_KEEPALIVE_message */ {bgp_ignore, Idle}, /* Receive_UPDATE_message */ {bgp_stop, Idle}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle}, /* Clearing_Completed */ }, { /* Active, */ @@ -935,6 +957,7 @@ struct { {bgp_ignore, Idle}, /* Receive_KEEPALIVE_message */ {bgp_ignore, Idle}, /* Receive_UPDATE_message */ {bgp_stop_with_error, Idle}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle}, /* Clearing_Completed */ }, { /* OpenSent, */ @@ -951,6 +974,7 @@ struct { {bgp_ignore, Idle}, /* Receive_KEEPALIVE_message */ {bgp_ignore, Idle}, /* Receive_UPDATE_message */ {bgp_stop_with_error, Idle}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle}, /* Clearing_Completed */ }, { /* OpenConfirm, */ @@ -967,22 +991,58 @@ struct { {bgp_establish, Established}, /* Receive_KEEPALIVE_message */ {bgp_ignore, Idle}, /* Receive_UPDATE_message */ {bgp_stop_with_error, Idle}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle}, /* Clearing_Completed */ }, { /* Established, */ - {bgp_ignore, Established}, /* BGP_Start */ - {bgp_stop, Idle}, /* BGP_Stop */ - {bgp_stop, Idle}, /* TCP_connection_open */ - {bgp_stop, Idle}, /* TCP_connection_closed */ - {bgp_ignore, Idle}, /* TCP_connection_open_failed */ - {bgp_stop, Idle}, /* TCP_fatal_error */ - {bgp_ignore, Idle}, /* ConnectRetry_timer_expired */ - {bgp_fsm_holdtime_expire, Idle}, /* Hold_Timer_expired */ + {bgp_ignore, Established}, /* BGP_Start */ + {bgp_stop, Clearing}, /* BGP_Stop */ + {bgp_stop, Clearing}, /* TCP_connection_open */ + {bgp_stop, Clearing}, /* TCP_connection_closed */ + {bgp_ignore, Clearing}, /* TCP_connection_open_failed */ + {bgp_stop, Clearing}, /* TCP_fatal_error */ + {bgp_ignore, Clearing}, /* ConnectRetry_timer_expired */ + {bgp_fsm_holdtime_expire, Clearing}, /* Hold_Timer_expired */ {bgp_fsm_keepalive_expire, Established}, /* KeepAlive_timer_expired */ - {bgp_stop, Idle}, /* Receive_OPEN_message */ - {bgp_fsm_keepalive, Established}, /* Receive_KEEPALIVE_message */ - {bgp_fsm_update, Established}, /* Receive_UPDATE_message */ - {bgp_stop_with_error, Idle}, /* Receive_NOTIFICATION_message */ + {bgp_stop, Clearing}, /* Receive_OPEN_message */ + {bgp_fsm_keepalive, Established}, /* Receive_KEEPALIVE_message */ + {bgp_fsm_update, Established}, /* Receive_UPDATE_message */ + {bgp_stop_with_error, Clearing}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle}, /* Clearing_Completed */ + }, + { + /* Clearing, */ + {bgp_ignore, Clearing}, /* BGP_Start */ + {bgp_ignore, Clearing}, /* BGP_Stop */ + {bgp_ignore, Clearing}, /* TCP_connection_open */ + {bgp_ignore, Clearing}, /* TCP_connection_closed */ + {bgp_ignore, Clearing}, /* TCP_connection_open_failed */ + {bgp_ignore, Clearing}, /* TCP_fatal_error */ + {bgp_ignore, Clearing}, /* ConnectRetry_timer_expired */ + {bgp_ignore, Clearing}, /* Hold_Timer_expired */ + {bgp_ignore, Clearing}, /* KeepAlive_timer_expired */ + {bgp_ignore, Clearing}, /* Receive_OPEN_message */ + {bgp_ignore, Clearing}, /* Receive_KEEPALIVE_message */ + {bgp_ignore, Clearing}, /* Receive_UPDATE_message */ + {bgp_ignore, Clearing}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Idle }, /* Clearing_Completed */ + }, + { + /* Deleted, */ + {bgp_ignore, Deleted}, /* BGP_Start */ + {bgp_ignore, Deleted}, /* BGP_Stop */ + {bgp_ignore, Deleted}, /* TCP_connection_open */ + {bgp_ignore, Deleted}, /* TCP_connection_closed */ + {bgp_ignore, Deleted}, /* TCP_connection_open_failed */ + {bgp_ignore, Deleted}, /* TCP_fatal_error */ + {bgp_ignore, Deleted}, /* ConnectRetry_timer_expired */ + {bgp_ignore, Deleted}, /* Hold_Timer_expired */ + {bgp_ignore, Deleted}, /* KeepAlive_timer_expired */ + {bgp_ignore, Deleted}, /* Receive_OPEN_message */ + {bgp_ignore, Deleted}, /* Receive_KEEPALIVE_message */ + {bgp_ignore, Deleted}, /* Receive_UPDATE_message */ + {bgp_ignore, Deleted}, /* Receive_NOTIFICATION_message */ + {bgp_ignore, Deleted}, /* Clearing_Completed */ }, }; @@ -1001,14 +1061,15 @@ static const char *bgp_event_str[] = "Receive_OPEN_message", "Receive_KEEPALIVE_message", "Receive_UPDATE_message", - "Receive_NOTIFICATION_message" + "Receive_NOTIFICATION_message", + "Clearing_Completed", }; /* Execute event process. */ int bgp_event (struct thread *thread) { - int ret; + int ret = 0; int event; int next; struct peer *peer; @@ -1019,14 +1080,15 @@ bgp_event (struct thread *thread) /* Logging this event. */ next = FSM [peer->status -1][event - 1].next_state; - if (BGP_DEBUG (fsm, FSM)) + if (BGP_DEBUG (fsm, FSM) && peer->status != next) plog_debug (peer->log, "%s [FSM] %s (%s->%s)", peer->host, bgp_event_str[event], LOOKUP (bgp_status_msg, peer->status), LOOKUP (bgp_status_msg, next)); /* Call function. */ - ret = (*(FSM [peer->status - 1][event - 1].func))(peer); + if (FSM [peer->status -1][event - 1].func) + ret = (*(FSM [peer->status - 1][event - 1].func))(peer); /* When function do not want proceed next job return -1. */ if (ret >= 0) diff --git a/bgpd/bgp_fsm.h b/bgpd/bgp_fsm.h index e90f3b43..0a5d3715 100644 --- a/bgpd/bgp_fsm.h +++ b/bgpd/bgp_fsm.h @@ -43,7 +43,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define BGP_WRITE_ON(T,F,V) \ do { \ - if (!T) \ + if (!(T) && (peer->status != Deleted)) \ { \ peer_lock (peer); \ THREAD_WRITE_ON(master,(T),(F),peer,(V)); \ @@ -61,7 +61,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define BGP_TIMER_ON(T,F,V) \ do { \ - if (!T) \ + if (!(T) && (peer->status != Deleted)) \ { \ peer_lock (peer); \ THREAD_TIMER_ON(master,(T),(F),peer,(V)); \ @@ -79,8 +79,11 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define BGP_EVENT_ADD(P,E) \ do { \ - peer_lock (peer); /* bgp event reference */ \ - thread_add_event (master, bgp_event, (P), (E)); \ + if ((P)->status != Deleted) \ + { \ + peer_lock (peer); /* bgp event reference */ \ + thread_add_event (master, bgp_event, (P), (E)); \ + } \ } while (0) #define BGP_EVENT_DELETE(P) \ @@ -90,6 +93,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA thread_cancel_event (master, (P)); \ } while (0) +#define BGP_EVENT_FLUSH_ADD(P,E) \ + do { \ + BGP_EVENT_DELETE(P); \ + BGP_EVENT_ADD(P,E); \ + } while (0) + /* Prototypes. */ extern int bgp_event (struct thread *); extern int bgp_stop (struct peer *peer); diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 8b024a1c..da59d329 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -637,9 +637,7 @@ bgp_write (struct thread *thread) if (write_errno == EWOULDBLOCK || write_errno == EAGAIN) break; - BGP_EVENT_ADD (peer, BGP_Stop); - peer->status = Idle; - bgp_timer_set (peer); + BGP_EVENT_FLUSH_ADD (peer, TCP_fatal_error); return 0; } if (num != writenum) @@ -673,10 +671,8 @@ bgp_write (struct thread *thread) if (peer->v_start >= (60 * 2)) peer->v_start = (60 * 2); - BGP_EVENT_ADD (peer, BGP_Stop); - /*bgp_stop (peer);*/ - peer->status = Idle; - bgp_timer_set (peer); + /* Flush any existing events */ + BGP_EVENT_FLUSH_ADD (peer, BGP_Stop); return 0; case BGP_MSG_KEEPALIVE: peer->keepalive_out++; @@ -721,9 +717,7 @@ bgp_write_notify (struct peer *peer) ret = writen (peer->fd, STREAM_DATA (s), stream_get_endp (s)); if (ret <= 0) { - BGP_EVENT_ADD (peer, BGP_Stop); - peer->status = Idle; - bgp_timer_set (peer); + BGP_EVENT_FLUSH_ADD (peer, TCP_fatal_error); return 0; } @@ -743,10 +737,7 @@ bgp_write_notify (struct peer *peer) if (peer->v_start >= (60 * 2)) peer->v_start = (60 * 2); - /* We don't call event manager at here for avoiding other events. */ - bgp_stop (peer); - peer->status = Idle; - bgp_timer_set (peer); + BGP_EVENT_FLUSH_ADD (peer, BGP_Stop); return 0; } @@ -2375,14 +2366,6 @@ bgp_read (struct thread *thread) if (BGP_DEBUG (events, EVENTS)) zlog_debug ("%s [Event] Accepting BGP peer delete", peer->host); peer_delete (peer); - /* we've lost track of a reference to ACCEPT_PEER somehow. It doesnt - * _seem_ to be the 'update realpeer with accept peer' hack, yet it - * *must* be.. Very very odd, but I give up trying to - * root cause this - ACCEPT_PEER is a dirty hack, it should be fixed - * instead, which would make root-causing this a moot point.. - * A hack because of a hack, appropriate. - */ - peer_unlock (peer); /* god knows what reference... ACCEPT_PEER sucks */ } return 0; } diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 2ce2ef4d..5dde41de 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -2527,11 +2527,10 @@ bgp_clear_node_complete (struct work_queue *wq) { struct peer *peer = wq->spec.data; - UNSET_FLAG (peer->sflags, PEER_STATUS_CLEARING); peer_unlock (peer); /* bgp_clear_node_complete */ /* Tickle FSM to start moving again */ - BGP_EVENT_ADD (peer, BGP_Start); + BGP_EVENT_ADD (peer, Clearing_Completed); } static void @@ -2593,9 +2592,10 @@ bgp_clear_route (struct peer *peer, afi_t afi, safi_t safi) if (peer->clear_node_queue == NULL) bgp_clear_node_queue_init (peer); - /* bgp_fsm.c will not bring CLEARING sessions out of Idle this - * protects against peers which flap faster than we can we clear, - * which could lead to: + /* bgp_fsm.c keeps sessions in state Clearing, not transitioning to + * Idle until it receives a Clearing_Completed event. This protects + * against peers which flap faster than we can we clear, which could + * lead to: * * a) race with routes from the new session being installed before * clear_route_node visits the node (to delete the route of that @@ -2604,11 +2604,8 @@ bgp_clear_route (struct peer *peer, afi_t afi, safi_t safi) * on the process_main queue. Fast-flapping could cause that queue * to grow and grow. */ - if (!CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING)) - { - SET_FLAG (peer->sflags, PEER_STATUS_CLEARING); - peer_lock (peer); /* bgp_clear_node_complete */ - } + if (!peer->clear_node_queue->thread) + peer_lock (peer); /* bgp_clear_node_complete */ if (safi != SAFI_MPLS_VPN) bgp_clear_route_table (peer, afi, safi, NULL, NULL); @@ -2624,11 +2621,37 @@ bgp_clear_route (struct peer *peer, afi_t afi, safi_t safi) bgp_clear_route_table (peer, afi, safi, NULL, rsclient); } - /* If no routes were cleared, nothing was added to workqueue, run the - * completion function now. + /* If no routes were cleared, nothing was added to workqueue, the + * completion function won't be run by workqueue code - call it here. + * + * Additionally, there is a presumption in FSM that clearing is only + * needed if peer state is Established - peers in pre-Established states + * shouldn't have any route-update state associated with them (in or out). + * + * We still get here from FSM through bgp_stop->clear_route_all in + * pre-Established though, so this is a useful sanity check to ensure + * the assumption above holds. + * + * At some future point, this check could be move to the top of the + * function, and do a quick early-return when state is + * pre-Established, avoiding above list and table scans. Once we're + * sure it is safe.. */ if (!peer->clear_node_queue->thread) bgp_clear_node_complete (peer->clear_node_queue); + else + { + /* clearing queue scheduled. Normal if in Established state + * (and about to transition out of it), but otherwise... + */ + if (peer->status != Established) + { + plog_err (peer->log, "%s [Error] State %s is not Established," + " but routes were cleared - bug!", + peer->host, LOOKUP (bgp_status_msg, peer->status)); + assert (peer->status == Established); + } + } } void diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index ec4b6c22..b108164f 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -6693,8 +6693,6 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi) vty_out (vty, " Idle (Admin)"); else if (CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW)) vty_out (vty, " Idle (PfxCt)"); - else if (CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING)) - vty_out (vty, " Idle (Clrng)"); else vty_out (vty, " %-11s", LOOKUP(bgp_status_msg, peer->status)); } diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 8ed598d2..733b33a6 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -687,6 +687,15 @@ peer_sort (struct peer *peer) static inline void peer_free (struct peer *peer) { + assert (peer->status == Deleted); + + /* this /ought/ to have been done already through bgp_stop earlier, + * but just to be sure.. + */ + bgp_timer_set (peer); + BGP_READ_OFF (peer->t_read); + BGP_WRITE_OFF (peer->t_write); + if (peer->desc) XFREE (MTYPE_PEER_DESC, peer->desc); @@ -704,6 +713,7 @@ peer_free (struct peer *peer) if (peer->clear_node_queue) work_queue_free (peer->clear_node_queue); + bgp_sync_delete (peer); memset (peer, 0, sizeof (struct peer)); XFREE (MTYPE_BGP_PEER, peer); @@ -714,7 +724,8 @@ struct peer * peer_lock (struct peer *peer) { assert (peer && (peer->lock >= 0)); - + assert (peer->status != Deleted); + peer->lock++; return peer; @@ -761,18 +772,17 @@ peer_new () struct servent *sp; /* Allocate new peer. */ - peer = XMALLOC (MTYPE_BGP_PEER, sizeof (struct peer)); - memset (peer, 0, sizeof (struct peer)); + peer = XCALLOC (MTYPE_BGP_PEER, sizeof (struct peer)); /* Set default value. */ peer->fd = -1; - peer->lock = 1; peer->v_start = BGP_INIT_START_TIMER; peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; peer->v_asorig = BGP_DEFAULT_ASORIGINATE; peer->status = Idle; peer->ostatus = Idle; peer->weight = 0; + peer = peer_lock (peer); /* initial reference */ /* Set default flags. */ for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -1139,7 +1149,17 @@ peer_nsf_stop (struct peer *peer) bgp_clear_route_all (peer); } -/* Delete peer from confguration. */ +/* Delete peer from confguration. + * + * The peer is moved to a dead-end "Deleted" neighbour-state, to allow + * it to "cool off" and refcounts to hit 0, at which state it is freed. + * + * This function /should/ take care to be idempotent, to guard against + * it being called multiple times through stray events that come in + * that happen to result in this function being called again. That + * said, getting here for a "Deleted" peer is a bug in the neighbour + * FSM. + */ int peer_delete (struct peer *peer) { @@ -1149,6 +1169,8 @@ peer_delete (struct peer *peer) struct bgp *bgp; struct bgp_filter *filter; + assert (peer->status != Deleted); + bgp = peer->bgp; if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT)) @@ -1158,8 +1180,13 @@ peer_delete (struct peer *peer) relationship. */ if (peer->group) { - peer = peer_unlock (peer); /* peer-group reference */ - listnode_delete (peer->group->peer, peer); + struct listnode *pn; + + if ((pn = listnode_lookup (peer->group->peer, peer))) + { + peer = peer_unlock (peer); /* group->peer list reference */ + list_delete_node (peer->group->peer, pn); + } peer->group = NULL; } @@ -1169,29 +1196,25 @@ peer_delete (struct peer *peer) */ peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE; bgp_stop (peer); - bgp_fsm_change_status (peer, Idle); /* stops all timers */ + bgp_fsm_change_status (peer, Deleted); + bgp_timer_set (peer); /* stops all timers for Deleted */ - /* Stop all timers - should already have been done in bgp_stop */ - BGP_TIMER_OFF (peer->t_start); - BGP_TIMER_OFF (peer->t_connect); - BGP_TIMER_OFF (peer->t_holdtime); - BGP_TIMER_OFF (peer->t_keepalive); - BGP_TIMER_OFF (peer->t_asorig); - BGP_TIMER_OFF (peer->t_routeadv); - BGP_TIMER_OFF (peer->t_pmax_restart); - BGP_TIMER_OFF (peer->t_gr_restart); - BGP_TIMER_OFF (peer->t_gr_stale); - /* Delete from all peer list. */ if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { - peer_unlock (peer); /* bgp peer list reference */ - listnode_delete (bgp->peer, peer); + struct listnode *pn; + + if ((pn = listnode_lookup (bgp->peer, peer))) + { + peer_unlock (peer); /* bgp peer list reference */ + list_delete_node (bgp->peer, pn); + } - if (peer_rsclient_active (peer)) + if (peer_rsclient_active (peer) + && (pn = listnode_lookup (bgp->rsclient, peer))) { peer_unlock (peer); /* rsclient list reference */ - listnode_delete (bgp->rsclient, peer); + list_delete_node (bgp->rsclient, pn); } } @@ -1219,8 +1242,6 @@ peer_delete (struct peer *peer) sockunion_free (peer->su_remote); peer->su_local = peer->su_remote = NULL; - bgp_sync_delete (peer); - /* Free filter related memory. */ for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) @@ -1692,7 +1713,7 @@ peer_group_bind (struct bgp *bgp, union sockunion *su, peer->group = group; peer->af_group[afi][safi] = 1; - peer = peer_lock (peer); /* peer-group group->peer reference */ + peer = peer_lock (peer); /* group->peer list reference */ listnode_add (group->peer, peer); peer_group2peer_config_copy (group, peer, afi, safi); @@ -1733,9 +1754,11 @@ peer_group_bind (struct bgp *bgp, union sockunion *su, { peer->group = group; - peer = peer_lock (peer); /* peer-group group->peer reference */ + peer = peer_lock (peer); /* group->peer list reference */ listnode_add (group->peer, peer); } + else + assert (group && peer->group == group); if (first_member) { @@ -1759,13 +1782,16 @@ peer_group_bind (struct bgp *bgp, union sockunion *su, if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) { + struct listnode *pn; + /* If it's not configured as RSERVER_CLIENT in any other address family, without being member of a peer_group, remove it from list bgp->rsclient.*/ - if (! peer_rsclient_active (peer)) + if (! peer_rsclient_active (peer) + && (pn = listnode_lookup (bgp->rsclient, peer))) { peer_unlock (peer); /* peer rsclient reference */ - listnode_delete (bgp->rsclient, peer); + list_delete_node (bgp->rsclient, pn); } bgp_table_finish (peer->rib[afi][safi]); @@ -1821,6 +1847,7 @@ peer_group_unbind (struct bgp *bgp, struct peer *peer, if (! peer_group_active (peer)) { + assert (listnode_lookup (group->peer, peer)); peer_unlock (peer); /* peer group list reference */ listnode_delete (group->peer, peer); peer->group = NULL; diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index b8ae30ae..8b180a43 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -386,7 +386,6 @@ struct peer #define PEER_STATUS_GROUP (1 << 4) /* peer-group conf */ #define PEER_STATUS_NSF_MODE (1 << 5) /* NSF aware peer */ #define PEER_STATUS_NSF_WAIT (1 << 6) /* wait comeback peer */ -#define PEER_STATUS_CLEARING (1 << 7) /* peers table being cleared */ /* Peer status af flags (reset in bgp_stop) */ u_int16_t af_sflags[AFI_MAX][SAFI_MAX]; @@ -662,7 +661,9 @@ struct bgp_nlri #define OpenSent 4 #define OpenConfirm 5 #define Established 6 -#define BGP_STATUS_MAX 7 +#define Clearing 7 +#define Deleted 8 +#define BGP_STATUS_MAX 9 /* BGP finite state machine events. */ #define BGP_Start 1 @@ -678,7 +679,8 @@ struct bgp_nlri #define Receive_KEEPALIVE_message 11 #define Receive_UPDATE_message 12 #define Receive_NOTIFICATION_message 13 -#define BGP_EVENTS_MAX 14 +#define Clearing_Completed 14 +#define BGP_EVENTS_MAX 15 /* BGP timers default value. */ #define BGP_INIT_START_TIMER 5 |