diff options
Diffstat (limited to 'bgpd')
-rw-r--r-- | bgpd/Makefile.am | 4 | ||||
-rw-r--r-- | bgpd/bgp_aspath.c | 2 | ||||
-rw-r--r-- | bgpd/bgp_aspath.h | 1 | ||||
-rw-r--r-- | bgpd/bgp_ecommunity.c | 2 | ||||
-rw-r--r-- | bgpd/bgp_ecommunity.h | 1 | ||||
-rw-r--r-- | bgpd/bgp_main.c | 4 | ||||
-rw-r--r-- | bgpd/bgp_mpath.c | 737 | ||||
-rw-r--r-- | bgpd/bgp_mpath.h | 80 | ||||
-rw-r--r-- | bgpd/bgp_open.c | 31 | ||||
-rw-r--r-- | bgpd/bgp_packet.c | 15 | ||||
-rw-r--r-- | bgpd/bgp_route.c | 156 | ||||
-rw-r--r-- | bgpd/bgp_route.h | 6 | ||||
-rw-r--r-- | bgpd/bgp_vty.c | 158 | ||||
-rw-r--r-- | bgpd/bgp_zebra.c | 46 | ||||
-rw-r--r-- | bgpd/bgp_zebra.h | 6 | ||||
-rw-r--r-- | bgpd/bgpd.c | 9 | ||||
-rw-r--r-- | bgpd/bgpd.h | 6 |
17 files changed, 1204 insertions, 60 deletions
diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index 1b17d386..3051555b 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -15,14 +15,14 @@ libbgp_a_SOURCES = \ bgp_debug.c bgp_route.c bgp_zebra.c bgp_open.c bgp_routemap.c \ bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \ bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \ - bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c + bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c noinst_HEADERS = \ bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \ bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \ bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \ bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \ - bgp_advertise.h bgp_snmp.h bgp_vty.h + bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_aspath.c b/bgpd/bgp_aspath.c index 776c7127..64b36775 100644 --- a/bgpd/bgp_aspath.c +++ b/bgpd/bgp_aspath.c @@ -1819,7 +1819,7 @@ aspath_key_make (void *p) } /* If two aspath have same value then return 1 else return 0 */ -static int +int aspath_cmp (const void *arg1, const void *arg2) { const struct assegment *seg1 = ((const struct aspath *)arg1)->segments; diff --git a/bgpd/bgp_aspath.h b/bgpd/bgp_aspath.h index d55f9ce6..fc4dd6b7 100644 --- a/bgpd/bgp_aspath.h +++ b/bgpd/bgp_aspath.h @@ -72,6 +72,7 @@ extern struct aspath *aspath_prepend (struct aspath *, struct aspath *); extern struct aspath *aspath_filter_exclude (struct aspath *, struct aspath *); extern struct aspath *aspath_add_seq (struct aspath *, as_t); extern struct aspath *aspath_add_confed_seq (struct aspath *, as_t); +extern int aspath_cmp (const void *, const void *); extern int aspath_cmp_left (const struct aspath *, const struct aspath *); extern int aspath_cmp_left_confed (const struct aspath *, const struct aspath *); extern struct aspath *aspath_delete_confed_seq (struct aspath *); diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c index 440c15a4..9f4aaa4b 100644 --- a/bgpd/bgp_ecommunity.c +++ b/bgpd/bgp_ecommunity.c @@ -99,7 +99,7 @@ ecommunity_add_val (struct ecommunity *ecom, struct ecommunity_val *eval) /* This function takes pointer to Extended Communites strucutre then create a new Extended Communities structure by uniq and sort each Extended Communities value. */ -static struct ecommunity * +struct ecommunity * ecommunity_uniq_sort (struct ecommunity *ecom) { int i; diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h index 2f59dc40..92affccc 100644 --- a/bgpd/bgp_ecommunity.h +++ b/bgpd/bgp_ecommunity.h @@ -71,6 +71,7 @@ extern void ecommunity_free (struct ecommunity **); extern struct ecommunity *ecommunity_parse (u_int8_t *, u_short); extern struct ecommunity *ecommunity_dup (struct ecommunity *); extern struct ecommunity *ecommunity_merge (struct ecommunity *, struct ecommunity *); +extern struct ecommunity *ecommunity_uniq_sort (struct ecommunity *); extern struct ecommunity *ecommunity_intern (struct ecommunity *); extern int ecommunity_cmp (const void *, const void *); extern void ecommunity_unintern (struct ecommunity **); diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 8dede587..0f1d4829 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -35,6 +35,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "routemap.h" #include "filter.h" #include "plist.h" +#include "stream.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -47,6 +48,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_clist.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_filter.h" +#include "bgpd/bgp_zebra.h" /* bgpd options, we use GNU getopt library. */ static const struct option longopts[] = @@ -297,6 +299,8 @@ bgp_exit (int status) zclient_free (zclient); if (zlookup) zclient_free (zlookup); + if (bgp_nexthop_buf) + stream_free (bgp_nexthop_buf); /* reverse bgp_master_init */ if (master) diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c new file mode 100644 index 00000000..d07830d1 --- /dev/null +++ b/bgpd/bgp_mpath.c @@ -0,0 +1,737 @@ +/* $QuaggaId: Format:%an, %ai, %h$ $ + * + * BGP Multipath + * Copyright (C) 2010 Google Inc. + * + * This file is part of Quagga + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Quagga; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "command.h" +#include "prefix.h" +#include "linklist.h" +#include "sockunion.h" +#include "memory.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_community.h" +#include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_mpath.h" + +/* + * bgp_maximum_paths_set + * + * Record maximum-paths configuration for BGP instance + */ +int +bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi, + int peertype, u_int16_t maxpaths) +{ + if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX)) + return -1; + + switch (peertype) + { + case BGP_PEER_IBGP: + bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths; + break; + case BGP_PEER_EBGP: + bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths; + break; + default: + return -1; + } + + return 0; +} + +/* + * bgp_maximum_paths_unset + * + * Remove maximum-paths configuration from BGP instance + */ +int +bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi, + int peertype) +{ + if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX)) + return -1; + + switch (peertype) + { + case BGP_PEER_IBGP: + bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS; + break; + case BGP_PEER_EBGP: + bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS; + break; + default: + return -1; + } + + return 0; +} + +/* + * bgp_info_nexthop_cmp + * + * Compare the nexthops of two paths. Return value is less than, equal to, + * or greater than zero if bi1 is respectively less than, equal to, + * or greater than bi2. + */ +static int +bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2) +{ + struct attr_extra *ae1, *ae2; + int compare; + + ae1 = bi1->attr->extra; + ae2 = bi2->attr->extra; + + compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop); + + if (!compare && ae1 && ae2 && (ae1->mp_nexthop_len == ae2->mp_nexthop_len)) + { + switch (ae1->mp_nexthop_len) + { + case 4: + case 12: + compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in, + &ae2->mp_nexthop_global_in); + break; +#ifdef HAVE_IPV6 + case 16: + compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global, + &ae2->mp_nexthop_global); + break; + case 32: + compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global, + &ae2->mp_nexthop_global); + if (!compare) + compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local, + &ae2->mp_nexthop_local); + break; +#endif /* HAVE_IPV6 */ + } + } + + return compare; +} + +/* + * bgp_info_mpath_cmp + * + * This function determines our multipath list ordering. By ordering + * the list we can deterministically select which paths are included + * in the multipath set. The ordering also helps in detecting changes + * in the multipath selection so we can detect whether to send an + * update to zebra. + * + * The order of paths is determined first by received nexthop, and then + * by peer address if the nexthops are the same. + */ +static int +bgp_info_mpath_cmp (void *val1, void *val2) +{ + struct bgp_info *bi1, *bi2; + int compare; + + bi1 = val1; + bi2 = val2; + + compare = bgp_info_nexthop_cmp (bi1, bi2); + + if (!compare) + compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote); + + return compare; +} + +/* + * bgp_mp_list_init + * + * Initialize the mp_list, which holds the list of multipaths + * selected by bgp_best_selection + */ +void +bgp_mp_list_init (struct list *mp_list) +{ + assert (mp_list); + memset (mp_list, 0, sizeof (struct list)); + mp_list->cmp = bgp_info_mpath_cmp; +} + +/* + * bgp_mp_list_clear + * + * Clears all entries out of the mp_list + */ +void +bgp_mp_list_clear (struct list *mp_list) +{ + assert (mp_list); + list_delete_all_node (mp_list); +} + +/* + * bgp_mp_list_add + * + * Adds a multipath entry to the mp_list + */ +void +bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo) +{ + assert (mp_list && mpinfo); + listnode_add_sort (mp_list, mpinfo); +} + +/* + * bgp_info_mpath_new + * + * Allocate and zero memory for a new bgp_info_mpath element + */ +static struct bgp_info_mpath * +bgp_info_mpath_new (void) +{ + struct bgp_info_mpath *new_mpath; + new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath)); + return new_mpath; +} + +/* + * bgp_info_mpath_free + * + * Release resources for a bgp_info_mpath element and zero out pointer + */ +void +bgp_info_mpath_free (struct bgp_info_mpath **mpath) +{ + if (mpath && *mpath) + { + if ((*mpath)->mp_attr) + bgp_attr_unintern ((*mpath)->mp_attr); + XFREE (MTYPE_BGP_MPATH_INFO, *mpath); + *mpath = NULL; + } +} + +/* + * bgp_info_mpath_get + * + * Fetch the mpath element for the given bgp_info. Used for + * doing lazy allocation. + */ +static struct bgp_info_mpath * +bgp_info_mpath_get (struct bgp_info *binfo) +{ + struct bgp_info_mpath *mpath; + if (!binfo->mpath) + { + mpath = bgp_info_mpath_new(); + if (!mpath) + return NULL; + binfo->mpath = mpath; + mpath->mp_info = binfo; + } + return binfo->mpath; +} + +/* + * bgp_info_mpath_enqueue + * + * Enqueue a path onto the multipath list given the previous multipath + * list entry + */ +static void +bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo) +{ + struct bgp_info_mpath *prev, *mpath; + + prev = bgp_info_mpath_get (prev_info); + mpath = bgp_info_mpath_get (binfo); + if (!prev || !mpath) + return; + + mpath->mp_next = prev->mp_next; + mpath->mp_prev = prev; + if (prev->mp_next) + prev->mp_next->mp_prev = mpath; + prev->mp_next = mpath; + + SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH); +} + +/* + * bgp_info_mpath_dequeue + * + * Remove a path from the multipath list + */ +void +bgp_info_mpath_dequeue (struct bgp_info *binfo) +{ + struct bgp_info_mpath *mpath = binfo->mpath; + if (!mpath) + return; + if (mpath->mp_prev) + mpath->mp_prev->mp_next = mpath->mp_next; + if (mpath->mp_next) + mpath->mp_next->mp_prev = mpath->mp_prev; + mpath->mp_next = mpath->mp_prev = NULL; + UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH); +} + +/* + * bgp_info_mpath_next + * + * Given a bgp_info, return the next multipath entry + */ +struct bgp_info * +bgp_info_mpath_next (struct bgp_info *binfo) +{ + if (!binfo->mpath || !binfo->mpath->mp_next) + return NULL; + return binfo->mpath->mp_next->mp_info; +} + +/* + * bgp_info_mpath_first + * + * Given bestpath bgp_info, return the first multipath entry. + */ +struct bgp_info * +bgp_info_mpath_first (struct bgp_info *binfo) +{ + return bgp_info_mpath_next (binfo); +} + +/* + * bgp_info_mpath_count + * + * Given the bestpath bgp_info, return the number of multipath entries + */ +u_int32_t +bgp_info_mpath_count (struct bgp_info *binfo) +{ + if (!binfo->mpath) + return 0; + return binfo->mpath->mp_count; +} + +/* + * bgp_info_mpath_count_set + * + * Sets the count of multipaths into bestpath's mpath element + */ +static void +bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count) +{ + struct bgp_info_mpath *mpath; + if (!count && !binfo->mpath) + return; + mpath = bgp_info_mpath_get (binfo); + if (!mpath) + return; + mpath->mp_count = count; +} + +/* + * bgp_info_mpath_attr + * + * Given bestpath bgp_info, return aggregated attribute set used + * for advertising the multipath route + */ +struct attr * +bgp_info_mpath_attr (struct bgp_info *binfo) +{ + if (!binfo->mpath) + return NULL; + return binfo->mpath->mp_attr; +} + +/* + * bgp_info_mpath_attr_set + * + * Sets the aggregated attribute into bestpath's mpath element + */ +static void +bgp_info_mpath_attr_set (struct bgp_info *binfo, struct attr *attr) +{ + struct bgp_info_mpath *mpath; + if (!attr && !binfo->mpath) + return; + mpath = bgp_info_mpath_get (binfo); + if (!mpath) + return; + mpath->mp_attr = attr; +} + +/* + * bgp_info_mpath_update + * + * Compare and sync up the multipath list with the mp_list generated by + * bgp_best_selection + */ +void +bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best, + struct bgp_info *old_best, struct list *mp_list, + struct bgp_maxpaths_cfg *mpath_cfg) +{ + u_int16_t maxpaths, mpath_count, old_mpath_count; + struct listnode *mp_node, *mp_next_node; + struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath; + int mpath_changed, debug; + char pfx_buf[INET_ADDRSTRLEN], nh_buf[2][INET_ADDRSTRLEN]; + + mpath_changed = 0; + maxpaths = BGP_DEFAULT_MAXPATHS; + mpath_count = 0; + cur_mpath = NULL; + old_mpath_count = 0; + prev_mpath = new_best; + mp_node = listhead (mp_list); + debug = BGP_DEBUG (events, EVENTS); + + if (debug) + prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf)); + + if (new_best) + { + mpath_count++; + if (new_best != old_best) + bgp_info_mpath_dequeue (new_best); + maxpaths = (peer_sort (new_best->peer) == BGP_PEER_IBGP) ? + mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp; + } + + if (old_best) + { + cur_mpath = bgp_info_mpath_first (old_best); + old_mpath_count = bgp_info_mpath_count (old_best); + bgp_info_mpath_count_set (old_best, 0); + bgp_info_mpath_dequeue (old_best); + } + + /* + * We perform an ordered walk through both lists in parallel. + * The reason for the ordered walk is that if there are paths + * that were previously multipaths and are still multipaths, the walk + * should encounter them in both lists at the same time. Otherwise + * there will be paths that are in one list or another, and we + * will deal with these separately. + * + * Note that new_best might be somewhere in the mp_list, so we need + * to skip over it + */ + while (mp_node || cur_mpath) + { + /* + * We can bail out of this loop if all existing paths on the + * multipath list have been visited (for cleanup purposes) and + * the maxpath requirement is fulfulled + */ + if (!cur_mpath && (mpath_count >= maxpaths)) + break; + + mp_next_node = mp_node ? listnextnode (mp_node) : NULL; + next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL; + + /* + * If equal, the path was a multipath and is still a multipath. + * Insert onto new multipath list if maxpaths allows. + */ + if (mp_node && (listgetdata (mp_node) == cur_mpath)) + { + list_delete_node (mp_list, mp_node); + bgp_info_mpath_dequeue (cur_mpath); + if ((mpath_count < maxpaths) && + bgp_info_nexthop_cmp (prev_mpath, cur_mpath)) + { + bgp_info_mpath_enqueue (prev_mpath, cur_mpath); + prev_mpath = cur_mpath; + mpath_count++; + } + else + { + mpath_changed = 1; + if (debug) + zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf, + inet_ntop (AF_INET, &cur_mpath->attr->nexthop, + nh_buf[0], sizeof (nh_buf[0])), + sockunion2str (cur_mpath->peer->su_remote, + nh_buf[1], sizeof (nh_buf[1]))); + } + mp_node = mp_next_node; + cur_mpath = next_mpath; + continue; + } + + if (cur_mpath && (!mp_node || + (bgp_info_mpath_cmp (cur_mpath, + listgetdata (mp_node)) < 0))) + { + /* + * If here, we have an old multipath and either the mp_list + * is finished or the next mp_node points to a later + * multipath, so we need to purge this path from the + * multipath list + */ + bgp_info_mpath_dequeue (cur_mpath); + mpath_changed = 1; + if (debug) + zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf, + inet_ntop (AF_INET, &cur_mpath->attr->nexthop, + nh_buf[0], sizeof (nh_buf[0])), + sockunion2str (cur_mpath->peer->su_remote, + nh_buf[1], sizeof (nh_buf[1]))); + cur_mpath = next_mpath; + } + else + { + /* + * If here, we have a path on the mp_list that was not previously + * a multipath (due to non-equivalance or maxpaths exceeded), + * or the matching multipath is sorted later in the multipath + * list. Before we enqueue the path on the new multipath list, + * make sure its not on the old_best multipath list or referenced + * via next_mpath: + * - If next_mpath points to this new path, update next_mpath to + * point to the multipath after this one + * - Dequeue the path from the multipath list just to make sure + */ + new_mpath = listgetdata (mp_node); + list_delete_node (mp_list, mp_node); + if ((mpath_count < maxpaths) && (new_mpath != new_best) && + bgp_info_nexthop_cmp (prev_mpath, new_mpath)) + { + if (new_mpath == next_mpath) + next_mpath = bgp_info_mpath_next (new_mpath); + bgp_info_mpath_dequeue (new_mpath); + + bgp_info_mpath_enqueue (prev_mpath, new_mpath); + prev_mpath = new_mpath; + mpath_changed = 1; + mpath_count++; + if (debug) + zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf, + inet_ntop (AF_INET, &new_mpath->attr->nexthop, + nh_buf[0], sizeof (nh_buf[0])), + sockunion2str (new_mpath->peer->su_remote, + nh_buf[1], sizeof (nh_buf[1]))); + } + mp_node = mp_next_node; + } + } + + if (new_best) + { + bgp_info_mpath_count_set (new_best, mpath_count-1); + if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count)) + SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG); + } +} + +/* + * bgp_mp_dmed_deselect + * + * Clean up multipath information for BGP_INFO_DMED_SELECTED path that + * is not selected as best path + */ +void +bgp_mp_dmed_deselect (struct bgp_info *dmed_best) +{ + struct bgp_info *mpinfo, *mpnext; + + if (!dmed_best) + return; + + for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext) + { + mpnext = bgp_info_mpath_next (mpinfo); + bgp_info_mpath_dequeue (mpinfo); + } + + bgp_info_mpath_count_set (dmed_best, 0); + UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG); + assert (bgp_info_mpath_first (dmed_best) == 0); +} + +/* + * bgp_info_mpath_aggregate_update + * + * Set the multipath aggregate attribute. We need to see if the + * aggregate has changed and then set the ATTR_CHANGED flag on the + * bestpath info so that a peer update will be generated. The + * change is detected by generating the current attribute, + * interning it, and then comparing the interned pointer with the + * current value. We can skip this generate/compare step if there + * is no change in multipath selection and no attribute change in + * any multipath. + */ +void +bgp_info_mpath_aggregate_update (struct bgp_info *new_best, + struct bgp_info *old_best) +{ + struct bgp_info *mpinfo; + struct aspath *aspath; + struct aspath *asmerge; + struct attr *new_attr, *old_attr; + u_char origin, attr_chg; + struct community *community, *commerge; + struct ecommunity *ecomm, *ecommerge; + struct attr_extra *ae; + struct attr attr = { 0 }; + + if (old_best && (old_best != new_best) && + (old_attr = bgp_info_mpath_attr (old_best))) + { + bgp_attr_unintern (old_attr); + bgp_info_mpath_attr_set (old_best, NULL); + } + + if (!new_best) + return; + + if (!bgp_info_mpath_count (new_best)) + { + if ((new_attr = bgp_info_mpath_attr (new_best))) + { + bgp_attr_unintern (new_attr); + bgp_info_mpath_attr_set (new_best, NULL); + SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED); + } + return; + } + + /* + * Bail out here if the following is true: + * - MULTIPATH_CHG bit is not set on new_best, and + * - No change in bestpath, and + * - ATTR_CHANGED bit is not set on new_best or any of the multipaths + */ + if (!CHECK_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG) && + (old_best == new_best)) + { + attr_chg = 0; + + if (CHECK_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED)) + attr_chg = 1; + else + for (mpinfo = bgp_info_mpath_first (new_best); mpinfo; + mpinfo = bgp_info_mpath_next (mpinfo)) + { + if (CHECK_FLAG (mpinfo->flags, BGP_INFO_ATTR_CHANGED)) + { + attr_chg = 1; + break; + } + } + + if (!attr_chg) + { + assert (bgp_info_mpath_attr (new_best)); + return; + } + } + + bgp_attr_dup (&attr, new_best->attr); + + /* aggregate attribute from multipath constituents */ + aspath = aspath_dup (attr.aspath); + origin = attr.origin; + community = attr.community ? community_dup (attr.community) : NULL; + ae = attr.extra; + ecomm = (ae && ae->ecommunity) ? ecommunity_dup (ae->ecommunity) : NULL; + + for (mpinfo = bgp_info_mpath_first (new_best); mpinfo; + mpinfo = bgp_info_mpath_next (mpinfo)) + { + asmerge = aspath_aggregate (aspath, mpinfo->attr->aspath); + aspath_free (aspath); + aspath = asmerge; + + if (origin < mpinfo->attr->origin) + origin = mpinfo->attr->origin; + + if (mpinfo->attr->community) + { + if (community) + { + commerge = community_merge (community, mpinfo->attr->community); + community = community_uniq_sort (commerge); + community_free (commerge); + } + else + community = community_dup (mpinfo->attr->community); + } + + ae = mpinfo->attr->extra; + if (ae && ae->ecommunity) + { + if (ecomm) + { + ecommerge = ecommunity_merge (ecomm, ae->ecommunity); + ecomm = ecommunity_uniq_sort (ecommerge); + ecommunity_free (ecommerge); + } + else + ecomm = ecommunity_dup (ae->ecommunity); + } + } + + attr.aspath = aspath; + attr.origin = origin; + if (community) + { + attr.community = community; + attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_COMMUNITIES); + } + if (ecomm) + { + ae = bgp_attr_extra_get (&attr); + ae->ecommunity = ecomm; + attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES); + } + + /* Zap multipath attr nexthop so we set nexthop to self */ + attr.nexthop.s_addr = 0; +#ifdef HAVE_IPV6 + if (attr.extra) + memset (&attr.extra->mp_nexthop_global, 0, sizeof (struct in6_addr)); +#endif /* HAVE_IPV6 */ + + /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */ + + new_attr = bgp_attr_intern (&attr); + bgp_attr_extra_free (&attr); + + if (new_attr != bgp_info_mpath_attr (new_best)) + { + if ((old_attr = bgp_info_mpath_attr (new_best))) + bgp_attr_unintern (old_attr); + bgp_info_mpath_attr_set (new_best, new_attr); + SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED); + } + else + bgp_attr_unintern (new_attr); +} diff --git a/bgpd/bgp_mpath.h b/bgpd/bgp_mpath.h new file mode 100644 index 00000000..37b9ac8b --- /dev/null +++ b/bgpd/bgp_mpath.h @@ -0,0 +1,80 @@ +/* $QuaggaId: Format:%an, %ai, %h$ $ + * + * BGP Multipath + * Copyright (C) 2010 Google Inc. + * + * This file is part of Quagga + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Quagga; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _QUAGGA_BGP_MPATH_H +#define _QUAGGA_BGP_MPATH_H + +/* BGP default maximum-paths */ +#define BGP_DEFAULT_MAXPATHS 1 + +/* Supplemental information linked to bgp_info for keeping track of + * multipath selections, lazily allocated to save memory + */ +struct bgp_info_mpath +{ + /* Points to the first multipath (on bestpath) or the next multipath */ + struct bgp_info_mpath *mp_next; + + /* Points to the previous multipath or NULL on bestpath */ + struct bgp_info_mpath *mp_prev; + + /* Points to bgp_info associated with this multipath info */ + struct bgp_info *mp_info; + + /* When attached to best path, the number of selected multipaths */ + u_int32_t mp_count; + + /* Aggregated attribute for advertising multipath route */ + struct attr *mp_attr; +}; + +/* Functions to support maximum-paths configuration */ +extern int bgp_maximum_paths_set (struct bgp *, afi_t, safi_t, int, u_int16_t); +extern int bgp_maximum_paths_unset (struct bgp *, afi_t, safi_t, int); + +/* Functions used by bgp_best_selection to record current + * multipath selections + */ +extern void bgp_mp_list_init (struct list *); +extern void bgp_mp_list_clear (struct list *); +extern void bgp_mp_list_add (struct list *, struct bgp_info *); +extern void bgp_mp_dmed_deselect (struct bgp_info *); +extern void bgp_info_mpath_update (struct bgp_node *, struct bgp_info *, + struct bgp_info *, struct list *, + struct bgp_maxpaths_cfg *); +extern void bgp_info_mpath_aggregate_update (struct bgp_info *, + struct bgp_info *); + +/* Unlink and free multipath information associated with a bgp_info */ +extern void bgp_info_mpath_dequeue (struct bgp_info *); +extern void bgp_info_mpath_free (struct bgp_info_mpath **); + +/* Walk list of multipaths associated with a best path */ +extern struct bgp_info *bgp_info_mpath_first (struct bgp_info *); +extern struct bgp_info *bgp_info_mpath_next (struct bgp_info *); + +/* Accessors for multipath information */ +extern u_int32_t bgp_info_mpath_count (struct bgp_info *); +extern struct attr *bgp_info_mpath_attr (struct bgp_info *); + +#endif /* _QUAGGA_BGP_MPATH_H */ diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c index b5b50bb5..0326d01b 100644 --- a/bgpd/bgp_open.c +++ b/bgpd/bgp_open.c @@ -464,11 +464,16 @@ static const size_t cap_minsizes[] = [CAPABILITY_CODE_ORF_OLD] = sizeof (struct capability_orf_entry), }; -/* Parse given capability. +/** + * Parse given capability. * XXX: This is reading into a stream, but not using stream API + * + * @param[out] mp_capability Set to 1 on return iff one or more Multiprotocol + * capabilities were encountered. */ static int -bgp_capability_parse (struct peer *peer, size_t length, u_char **error) +bgp_capability_parse (struct peer *peer, size_t length, int *mp_capability, + u_char **error) { int ret; struct stream *s = BGP_INPUT (peer); @@ -540,6 +545,8 @@ bgp_capability_parse (struct peer *peer, size_t length, u_char **error) { case CAPABILITY_CODE_MP: { + *mp_capability = 1; + /* Ignore capability when override-capability is set. */ if (! CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) { @@ -712,9 +719,13 @@ end: return as4; } -/* Parse open option */ +/** + * Parse open option. + * + * @param[out] mp_capability @see bgp_capability_parse() for semantics. + */ int -bgp_open_option_parse (struct peer *peer, u_char length, int *capability) +bgp_open_option_parse (struct peer *peer, u_char length, int *mp_capability) { int ret; u_char *error; @@ -767,8 +778,7 @@ bgp_open_option_parse (struct peer *peer, u_char length, int *capability) ret = bgp_auth_parse (peer, opt_length); break; case BGP_OPEN_OPT_CAP: - ret = bgp_capability_parse (peer, opt_length, &error); - *capability = 1; + ret = bgp_capability_parse (peer, opt_length, mp_capability, &error); break; default: bgp_notify_send (peer, @@ -811,9 +821,10 @@ bgp_open_option_parse (struct peer *peer, u_char length, int *capability) } } - /* Check there is no common capability send Unsupported Capability + /* Check there are no common AFI/SAFIs and send Unsupported Capability error. */ - if (*capability && ! CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + if (*mp_capability && + ! CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) { if (! peer->afc_nego[AFI_IP][SAFI_UNICAST] && ! peer->afc_nego[AFI_IP][SAFI_MULTICAST] @@ -821,7 +832,9 @@ bgp_open_option_parse (struct peer *peer, u_char length, int *capability) && ! peer->afc_nego[AFI_IP6][SAFI_UNICAST] && ! peer->afc_nego[AFI_IP6][SAFI_MULTICAST]) { - plog_err (peer->log, "%s [Error] No common capability", peer->host); + plog_err (peer->log, "%s [Error] Configured AFI/SAFIs do not " + "overlap with received MP capabilities", + peer->host); if (error != error_data) diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 5d8087a8..390b5563 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -1152,7 +1152,7 @@ bgp_open_receive (struct peer *peer, bgp_size_t size) as_t as4 = 0; struct peer *realpeer; struct in_addr remote_id; - int capability; + int mp_capability; u_int8_t notify_data_remote_as[2]; u_int8_t notify_data_remote_id[4]; @@ -1174,7 +1174,7 @@ bgp_open_receive (struct peer *peer, bgp_size_t size) inet_ntoa (remote_id)); /* BEGIN to read the capability here, but dont do it yet */ - capability = 0; + mp_capability = 0; optlen = stream_getc (peer->ibuf); if (optlen != 0) @@ -1459,7 +1459,7 @@ bgp_open_receive (struct peer *peer, bgp_size_t size) /* Open option part parse. */ if (optlen != 0) { - if ((ret = bgp_open_option_parse (peer, optlen, &capability)) < 0) + if ((ret = bgp_open_option_parse (peer, optlen, &mp_capability)) < 0) { bgp_notify_send (peer, BGP_NOTIFY_OPEN_ERR, @@ -1474,8 +1474,13 @@ bgp_open_receive (struct peer *peer, bgp_size_t size) peer->host); } - /* Override capability. */ - if (! capability || CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + /* + * Assume that the peer supports the locally configured set of + * AFI/SAFIs if the peer did not send us any Mulitiprotocol + * capabilities, or if 'override-capability' is configured. + */ + if (! mp_capability || + CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) { peer->afc_nego[AFI_IP][SAFI_UNICAST] = peer->afc[AFI_IP][SAFI_UNICAST]; peer->afc_nego[AFI_IP][SAFI_MULTICAST] = peer->afc[AFI_IP][SAFI_MULTICAST]; diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index ba530321..087f8396 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -54,6 +54,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_advertise.h" #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_mpath.h" /* Extern from bgp_dump.c */ extern const char *bgp_origin_str[]; @@ -140,6 +141,7 @@ bgp_info_free (struct bgp_info *binfo) bgp_attr_unintern (&binfo->attr); bgp_info_extra_free (&binfo->extra); + bgp_info_mpath_free (&binfo->mpath); peer_unlock (binfo->peer); /* bgp_info peer reference */ @@ -210,6 +212,7 @@ bgp_info_reap (struct bgp_node *rn, struct bgp_info *ri) else rn->info = ri->next; + bgp_info_mpath_dequeue (ri); bgp_info_unlock (ri); bgp_unlock_node (rn); } @@ -316,7 +319,8 @@ bgp_med_value (struct attr *attr, struct bgp *bgp) /* Compare two bgp route entity. br is preferable then return 1. */ static int -bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist) +bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, + int *paths_eq) { u_int32_t new_pref; u_int32_t exist_pref; @@ -331,6 +335,9 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist) int internal_as_route = 0; int confed_as_route = 0; int ret; + uint32_t newm, existm; + + *paths_eq = 0; /* 0. Null check. */ if (new == NULL) @@ -454,18 +461,32 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist) return 0; /* 8. IGP metric check. */ - if (new->extra || exist->extra) - { - uint32_t newm = (new->extra ? new->extra->igpmetric : 0); - uint32_t existm = (exist->extra ? exist->extra->igpmetric : 0); - - if (newm < existm) - return 1; - if (newm > existm) - return 0; - } + newm = (new->extra ? new->extra->igpmetric : 0); + existm = (exist->extra ? exist->extra->igpmetric : 0); + if (newm < existm) + ret = 1; + if (newm > existm) + ret = 0; /* 9. Maximum path check. */ + if (newm == existm) + { + if ((peer_sort (new->peer) == BGP_PEER_IBGP)) + { + if (aspath_cmp (new->attr->aspath, exist->attr->aspath)) + *paths_eq = 1; + } + else if (new->peer->as == exist->peer->as) + *paths_eq = 1; + } + else + { + /* + * TODO: If unequal cost ibgp multipath is enabled we can + * mark the paths as equal here instead of returning + */ + return ret; + } /* 10. If both paths are external, prefer the path that was received first (the oldest one). This step minimizes route-flap, since a @@ -764,10 +785,12 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, struct bgp *bgp; int transparent; int reflect; + struct attr *riattr; from = ri->peer; filter = &peer->filter[afi][safi]; bgp = peer->bgp; + riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; if (DISABLE_BGP_ANNOUNCE) return 0; @@ -782,11 +805,11 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, /* If peer's id and route's nexthop are same. draft-ietf-idr-bgp4-23 5.1.3 */ if (p->family == AF_INET - && IPV4_ADDR_SAME(&peer->remote_id, &ri->attr->nexthop)) + && IPV4_ADDR_SAME(&peer->remote_id, &riattr->nexthop)) return 0; #ifdef HAVE_IPV6 if (p->family == AF_INET6 - && IPV6_ADDR_SAME(&peer->remote_id, &ri->attr->nexthop)) + && IPV6_ADDR_SAME(&peer->remote_id, &riattr->nexthop)) return 0; #endif @@ -814,14 +837,14 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, transparent = 0; /* If community is not disabled check the no-export and local. */ - if (! transparent && bgp_community_filter (peer, ri->attr)) + if (! transparent && bgp_community_filter (peer, riattr)) return 0; /* If the attribute has originator-id and it is same as remote peer's id. */ - if (ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) + if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) { - if (IPV4_ADDR_SAME (&peer->remote_id, &ri->attr->extra->originator_id)) + if (IPV4_ADDR_SAME (&peer->remote_id, &riattr->extra->originator_id)) { if (BGP_DEBUG (filter, FILTER)) zlog (peer->log, LOG_DEBUG, @@ -844,7 +867,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, } /* Output filter check. */ - if (bgp_output_filter (peer, p, ri->attr, afi, safi) == FILTER_DENY) + if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY) { if (BGP_DEBUG (filter, FILTER)) zlog (peer->log, LOG_DEBUG, @@ -857,7 +880,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, #ifdef BGP_SEND_ASPATH_CHECK /* AS path loop check. */ - if (aspath_loop_check (ri->attr->aspath, peer->as)) + if (aspath_loop_check (riattr->aspath, peer->as)) { if (BGP_DEBUG (filter, FILTER)) zlog (peer->log, LOG_DEBUG, @@ -870,7 +893,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, /* If we're a CONFED we need to loop check the CONFED ID too */ if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)) { - if (aspath_loop_check(ri->attr->aspath, bgp->confed_id)) + if (aspath_loop_check(riattr->aspath, bgp->confed_id)) { if (BGP_DEBUG (filter, FILTER)) zlog (peer->log, LOG_DEBUG, @@ -911,7 +934,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, } /* For modify attribute, copy it to temporary structure. */ - bgp_attr_dup (attr, ri->attr); + bgp_attr_dup (attr, riattr); /* If local-preference is not set. */ if ((peer_sort (peer) == BGP_PEER_IBGP @@ -1069,9 +1092,11 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, struct bgp_filter *filter; struct bgp_info info; struct peer *from; + struct attr *riattr; from = ri->peer; filter = &rsclient->filter[afi][safi]; + riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; if (DISABLE_BGP_ANNOUNCE) return 0; @@ -1099,10 +1124,10 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, /* If the attribute has originator-id and it is same as remote peer's id. */ - if (ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) + if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) { if (IPV4_ADDR_SAME (&rsclient->remote_id, - &ri->attr->extra->originator_id)) + &riattr->extra->originator_id)) { if (BGP_DEBUG (filter, FILTER)) zlog (rsclient->log, LOG_DEBUG, @@ -1125,7 +1150,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, } /* Output filter check. */ - if (bgp_output_filter (rsclient, p, ri->attr, afi, safi) == FILTER_DENY) + if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY) { if (BGP_DEBUG (filter, FILTER)) zlog (rsclient->log, LOG_DEBUG, @@ -1138,7 +1163,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, #ifdef BGP_SEND_ASPATH_CHECK /* AS path loop check. */ - if (aspath_loop_check (ri->attr->aspath, rsclient->as)) + if (aspath_loop_check (riattr->aspath, rsclient->as)) { if (BGP_DEBUG (filter, FILTER)) zlog (rsclient->log, LOG_DEBUG, @@ -1149,7 +1174,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, #endif /* BGP_SEND_ASPATH_CHECK */ /* For modify attribute, copy it to temporary structure. */ - bgp_attr_dup (attr, ri->attr); + bgp_attr_dup (attr, riattr); /* next-hop-set */ if ((p->family == AF_INET && attr->nexthop.s_addr == 0) @@ -1265,7 +1290,9 @@ struct bgp_info_pair }; static void -bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *result) +bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, + struct bgp_maxpaths_cfg *mpath_cfg, + struct bgp_info_pair *result) { struct bgp_info *new_select; struct bgp_info *old_select; @@ -1273,7 +1300,13 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair * struct bgp_info *ri1; struct bgp_info *ri2; struct bgp_info *nextri = NULL; - + int paths_eq, do_mpath; + struct list mp_list; + + bgp_mp_list_init (&mp_list); + do_mpath = (mpath_cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS || + mpath_cfg->maxpaths_ibgp != BGP_DEFAULT_MAXPATHS); + /* bgp deterministic-med */ new_select = NULL; if (bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED)) @@ -1285,6 +1318,9 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair * continue; new_select = ri1; + if (do_mpath) + bgp_mp_list_add (&mp_list, ri1); + old_select = CHECK_FLAG (ri1->flags, BGP_INFO_SELECTED) ? ri1 : NULL; if (ri1->next) for (ri2 = ri1->next; ri2; ri2 = ri2->next) { @@ -1297,17 +1333,30 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair * || aspath_cmp_left_confed (ri1->attr->aspath, ri2->attr->aspath)) { - if (bgp_info_cmp (bgp, ri2, new_select)) + if (CHECK_FLAG (ri2->flags, BGP_INFO_SELECTED)) + old_select = ri2; + if (bgp_info_cmp (bgp, ri2, new_select, &paths_eq)) { bgp_info_unset_flag (rn, new_select, BGP_INFO_DMED_SELECTED); new_select = ri2; + if (do_mpath && !paths_eq) + { + bgp_mp_list_clear (&mp_list); + bgp_mp_list_add (&mp_list, ri2); + } } + if (do_mpath && paths_eq) + bgp_mp_list_add (&mp_list, ri2); + bgp_info_set_flag (rn, ri2, BGP_INFO_DMED_CHECK); } } bgp_info_set_flag (rn, new_select, BGP_INFO_DMED_CHECK); bgp_info_set_flag (rn, new_select, BGP_INFO_DMED_SELECTED); + + bgp_info_mpath_update (rn, new_select, old_select, &mp_list, mpath_cfg); + bgp_mp_list_clear (&mp_list); } /* Check old selected route and new selected route. */ @@ -1339,14 +1388,37 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair * bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_CHECK); bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_SELECTED); - if (bgp_info_cmp (bgp, ri, new_select)) - new_select = ri; + if (bgp_info_cmp (bgp, ri, new_select, &paths_eq)) + { + if (do_mpath && bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED)) + bgp_mp_dmed_deselect (new_select); + + new_select = ri; + + if (do_mpath && !paths_eq) + { + bgp_mp_list_clear (&mp_list); + bgp_mp_list_add (&mp_list, ri); + } + } + else if (do_mpath && bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED)) + bgp_mp_dmed_deselect (ri); + + if (do_mpath && paths_eq) + bgp_mp_list_add (&mp_list, ri); } - result->old = old_select; - result->new = new_select; - return; + if (!bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED)) + bgp_info_mpath_update (rn, new_select, old_select, &mp_list, mpath_cfg); + + bgp_info_mpath_aggregate_update (new_select, old_select); + bgp_mp_list_clear (&mp_list); + + result->old = old_select; + result->new = new_select; + + return; } static int @@ -1420,7 +1492,7 @@ bgp_process_rsclient (struct work_queue *wq, void *data) struct peer *rsclient = rn->table->owner; /* Best path selection. */ - bgp_best_selection (bgp, rn, &old_and_new); + bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new); new_select = old_and_new.new; old_select = old_and_new.old; @@ -1440,7 +1512,8 @@ bgp_process_rsclient (struct work_queue *wq, void *data) { bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED); bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED); - } + UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); + } bgp_process_announce_selected (rsclient, new_select, rn, afi, safi); @@ -1454,6 +1527,7 @@ bgp_process_rsclient (struct work_queue *wq, void *data) { bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED); bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED); + UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } bgp_process_announce_selected (rsclient, new_select, rn, afi, safi); } @@ -1481,7 +1555,7 @@ bgp_process_main (struct work_queue *wq, void *data) struct peer *peer; /* Best path selection. */ - bgp_best_selection (bgp, rn, &old_and_new); + bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new); old_select = old_and_new.old; new_select = old_and_new.new; @@ -1490,9 +1564,11 @@ bgp_process_main (struct work_queue *wq, void *data) { if (! CHECK_FLAG (old_select->flags, BGP_INFO_ATTR_CHANGED)) { - if (CHECK_FLAG (old_select->flags, BGP_INFO_IGP_CHANGED)) + if (CHECK_FLAG (old_select->flags, BGP_INFO_IGP_CHANGED) || + CHECK_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG)) bgp_zebra_announce (p, old_select, bgp, safi); + UNSET_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG); UNSET_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED); return WQ_SUCCESS; } @@ -1504,6 +1580,7 @@ bgp_process_main (struct work_queue *wq, void *data) { bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED); bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED); + UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } @@ -5932,6 +6009,11 @@ route_vty_out_detail (struct vty *vty, struct bgp *bgp, struct prefix *p, if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ATOMIC_AGGREGATE)) vty_out (vty, ", atomic-aggregate"); + if (CHECK_FLAG (binfo->flags, BGP_INFO_MULTIPATH) || + (CHECK_FLAG (binfo->flags, BGP_INFO_SELECTED) && + bgp_info_mpath_count (binfo))) + vty_out (vty, ", multipath"); + if (CHECK_FLAG (binfo->flags, BGP_INFO_SELECTED)) vty_out (vty, ", best"); diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 7adc573b..3d2eea51 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -57,6 +57,10 @@ struct bgp_info /* Extra information */ struct bgp_info_extra *extra; + + /* Multipath information */ + struct bgp_info_mpath *mpath; + /* Uptime. */ time_t uptime; @@ -76,6 +80,8 @@ struct bgp_info #define BGP_INFO_STALE (1 << 8) #define BGP_INFO_REMOVED (1 << 9) #define BGP_INFO_COUNTED (1 << 10) +#define BGP_INFO_MULTIPATH (1 << 11) +#define BGP_INFO_MULTIPATH_CHG (1 << 12) /* BGP route type. This can be static, RIP, OSPF, BGP etc. */ u_char type; diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index f65bb157..cbe0b443 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -48,6 +48,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_table.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_mpath.h" extern struct in_addr router_id_zebra; @@ -650,6 +651,149 @@ DEFUN (no_bgp_confederation_peers, return CMD_SUCCESS; } +/* Maximum-paths configuration */ +DEFUN (bgp_maxpaths, + bgp_maxpaths_cmd, + "maximum-paths <1-255>", + "Forward packets over multiple paths\n" + "Number of paths\n") +{ + struct bgp *bgp; + u_int16_t maxpaths; + int ret; + + bgp = vty->index; + + VTY_GET_INTEGER_RANGE ("maximum-paths", maxpaths, argv[0], 1, 255); + + ret = bgp_maximum_paths_set (bgp, bgp_node_afi (vty), bgp_node_safi(vty), + BGP_PEER_EBGP, maxpaths); + if (ret < 0) + { + vty_out (vty, + "%% Failed to set maximum-paths %u for afi %u, safi %u%s", + maxpaths, bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +DEFUN (bgp_maxpaths_ibgp, + bgp_maxpaths_ibgp_cmd, + "maximum-paths ibgp <1-255>", + "Forward packets over multiple paths\n" + "iBGP-multipath\n" + "Number of paths\n") +{ + struct bgp *bgp; + u_int16_t maxpaths; + int ret; + + bgp = vty->index; + + VTY_GET_INTEGER_RANGE ("maximum-paths", maxpaths, argv[0], 1, 255); + + ret = bgp_maximum_paths_set (bgp, bgp_node_afi (vty), bgp_node_safi(vty), + BGP_PEER_IBGP, maxpaths); + if (ret < 0) + { + vty_out (vty, + "%% Failed to set maximum-paths ibgp %u for afi %u, safi %u%s", + maxpaths, bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +DEFUN (no_bgp_maxpaths, + no_bgp_maxpaths_cmd, + "no maximum-paths", + NO_STR + "Forward packets over multiple paths\n" + "Number of paths\n") +{ + struct bgp *bgp; + int ret; + + bgp = vty->index; + + ret = bgp_maximum_paths_unset (bgp, bgp_node_afi (vty), bgp_node_safi(vty), + BGP_PEER_EBGP); + if (ret < 0) + { + vty_out (vty, + "%% Failed to unset maximum-paths for afi %u, safi %u%s", + bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +ALIAS (no_bgp_maxpaths, + no_bgp_maxpaths_arg_cmd, + "no maximum-paths <1-255>", + NO_STR + "Forward packets over multiple paths\n" + "Number of paths\n") + +DEFUN (no_bgp_maxpaths_ibgp, + no_bgp_maxpaths_ibgp_cmd, + "no maximum-paths ibgp", + NO_STR + "Forward packets over multiple paths\n" + "iBGP-multipath\n" + "Number of paths\n") +{ + struct bgp *bgp; + int ret; + + bgp = vty->index; + + ret = bgp_maximum_paths_unset (bgp, bgp_node_afi (vty), bgp_node_safi(vty), + BGP_PEER_IBGP); + if (ret < 0) + { + vty_out (vty, + "%% Failed to unset maximum-paths ibgp for afi %u, safi %u%s", + bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +ALIAS (no_bgp_maxpaths_ibgp, + no_bgp_maxpaths_ibgp_arg_cmd, + "no maximum-paths ibgp <1-255>", + NO_STR + "Forward packets over multiple paths\n" + "iBGP-multipath\n" + "Number of paths\n") + +int +bgp_config_write_maxpaths (struct vty *vty, struct bgp *bgp, afi_t afi, + safi_t safi, int *write) +{ + if (bgp->maxpaths[afi][safi].maxpaths_ebgp != BGP_DEFAULT_MAXPATHS) + { + bgp_config_write_family_header (vty, afi, safi, write); + vty_out (vty, " maximum-paths %d%s", + bgp->maxpaths[afi][safi].maxpaths_ebgp, VTY_NEWLINE); + } + + if (bgp->maxpaths[afi][safi].maxpaths_ibgp != BGP_DEFAULT_MAXPATHS) + { + bgp_config_write_family_header (vty, afi, safi, write); + vty_out (vty, " maximum-paths ibgp %d%s", + bgp->maxpaths[afi][safi].maxpaths_ibgp, VTY_NEWLINE); + } + + return 0; +} + /* BGP timers. */ DEFUN (bgp_timers, @@ -8937,6 +9081,20 @@ bgp_vty_init (void) install_element (BGP_NODE, &bgp_confederation_peers_cmd); install_element (BGP_NODE, &no_bgp_confederation_peers_cmd); + /* "maximum-paths" commands. */ + install_element (BGP_NODE, &bgp_maxpaths_cmd); + install_element (BGP_NODE, &no_bgp_maxpaths_cmd); + install_element (BGP_NODE, &no_bgp_maxpaths_arg_cmd); + install_element (BGP_IPV4_NODE, &bgp_maxpaths_cmd); + install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_cmd); + install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_arg_cmd); + install_element (BGP_NODE, &bgp_maxpaths_ibgp_cmd); + install_element (BGP_NODE, &no_bgp_maxpaths_ibgp_cmd); + install_element (BGP_NODE, &no_bgp_maxpaths_ibgp_arg_cmd); + install_element (BGP_IPV4_NODE, &bgp_maxpaths_ibgp_cmd); + install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_ibgp_cmd); + install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_ibgp_arg_cmd); + /* "timers bgp" commands. */ install_element (BGP_NODE, &bgp_timers_cmd); install_element (BGP_NODE, &no_bgp_timers_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 20feba0f..5c0dbb88 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -37,11 +37,15 @@ Boston, MA 02111-1307, USA. */ #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_fsm.h" #include "bgpd/bgp_debug.h" +#include "bgpd/bgp_mpath.h" /* All information about zebra. */ struct zclient *zclient = NULL; struct in_addr router_id_zebra; +/* Growable buffer for nexthops sent to zebra */ +struct stream *bgp_nexthop_buf = NULL; + /* Router-id update message from zebra. */ static int bgp_router_id_update (int command, struct zclient *zclient, zebra_size_t length) @@ -648,6 +652,8 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa int flags; u_char distance; struct peer *peer; + struct bgp_info *mpinfo; + size_t oldsize, newsize; if (zclient->sock < 0) return; @@ -668,6 +674,21 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa || CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)) SET_FLAG (flags, ZEBRA_FLAG_INTERNAL); + /* resize nexthop buffer size if necessary */ + if ((oldsize = stream_get_size (bgp_nexthop_buf)) < + (sizeof (struct in_addr *) * (bgp_info_mpath_count (info) + 1))) + { + newsize = (sizeof (struct in_addr *) * (bgp_info_mpath_count (info) + 1)); + newsize = stream_resize (bgp_nexthop_buf, newsize); + if (newsize == oldsize) + { + zlog_err ("can't resize nexthop buffer"); + return; + } + } + + stream_reset (bgp_nexthop_buf); + if (p->family == AF_INET) { struct zapi_ipv4 api; @@ -675,13 +696,20 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa api.flags = flags; nexthop = &info->attr->nexthop; + stream_put (bgp_nexthop_buf, &nexthop, sizeof (struct in_addr *)); + for (mpinfo = bgp_info_mpath_first (info); mpinfo; + mpinfo = bgp_info_mpath_next (mpinfo)) + { + nexthop = &mpinfo->attr->nexthop; + stream_put (bgp_nexthop_buf, &nexthop, sizeof (struct in_addr *)); + } api.type = ZEBRA_ROUTE_BGP; api.message = 0; api.safi = safi; SET_FLAG (api.message, ZAPI_MESSAGE_NEXTHOP); - api.nexthop_num = 1; - api.nexthop = &nexthop; + api.nexthop_num = 1 + bgp_info_mpath_count (info); + api.nexthop = (struct in_addr **)STREAM_DATA (bgp_nexthop_buf); api.ifindex_num = 0; SET_FLAG (api.message, ZAPI_MESSAGE_METRIC); api.metric = info->attr->med; @@ -696,12 +724,18 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa if (BGP_DEBUG(zebra, ZEBRA)) { + int i; char buf[2][INET_ADDRSTRLEN]; - zlog_debug("Zebra send: IPv4 route add %s/%d nexthop %s metric %u", + zlog_debug("Zebra send: IPv4 route add %s/%d nexthop %s metric %u" + " count %d", inet_ntop(AF_INET, &p->u.prefix4, buf[0], sizeof(buf[0])), p->prefixlen, - inet_ntop(AF_INET, nexthop, buf[1], sizeof(buf[1])), - api.metric); + inet_ntop(AF_INET, api.nexthop[0], buf[1], sizeof(buf[1])), + api.metric, api.nexthop_num); + for (i = 1; i < api.nexthop_num; i++) + zlog_debug("Zebra send: IPv4 route add [nexthop %d] %s", + i, inet_ntop(AF_INET, api.nexthop[i], buf[1], + sizeof(buf[1]))); } zapi_ipv4_route (ZEBRA_IPV4_ROUTE_ADD, zclient, @@ -1050,4 +1084,6 @@ bgp_zebra_init (void) /* Interface related init. */ if_init (); + + bgp_nexthop_buf = stream_new(BGP_NEXTHOP_BUF_SIZE); } diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 1351333f..80991930 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -21,8 +21,14 @@ Boston, MA 02111-1307, USA. */ #ifndef _QUAGGA_BGP_ZEBRA_H #define _QUAGGA_BGP_ZEBRA_H +#define BGP_NEXTHOP_BUF_SIZE (8 * sizeof (struct in_addr *)) + +extern struct stream *bgp_nexthop_buf; + extern void bgp_zebra_init (void); extern int bgp_if_update_all (void); +extern int bgp_config_write_maxpaths (struct vty *, struct bgp *, afi_t, + safi_t, int *); extern int bgp_config_write_redistribute (struct vty *, struct bgp *, afi_t, safi_t, int *); extern void bgp_zebra_announce (struct prefix *, struct bgp_info *, struct bgp *, safi_t); diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 5dc014be..9c8eda88 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -57,6 +57,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_advertise.h" #include "bgpd/bgp_network.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_mpath.h" #ifdef HAVE_SNMP #include "bgpd/bgp_snmp.h" #endif /* HAVE_SNMP */ @@ -1947,6 +1948,8 @@ bgp_create (as_t *as, const char *name) bgp->route[afi][safi] = bgp_table_init (afi, safi); bgp->aggregate[afi][safi] = bgp_table_init (afi, safi); bgp->rib[afi][safi] = bgp_table_init (afi, safi); + bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS; + bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS; } bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; @@ -5121,6 +5124,9 @@ bgp_config_write_family (struct vty *vty, struct bgp *bgp, afi_t afi, } } } + + bgp_config_write_maxpaths (vty, bgp, afi, safi, &write); + if (write) vty_out (vty, " exit-address-family%s", VTY_NEWLINE); @@ -5294,6 +5300,9 @@ bgp_config_write (struct vty *vty) bgp_config_write_peer (vty, bgp, peer, AFI_IP, SAFI_UNICAST); } + /* maximum-paths */ + bgp_config_write_maxpaths (vty, bgp, AFI_IP, SAFI_UNICAST, &write); + /* Distance configuration. */ bgp_config_write_distance (vty, bgp); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 892e7dec..09a3435c 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -162,6 +162,12 @@ struct bgp /* BGP graceful restart */ u_int32_t restart_time; u_int32_t stalepath_time; + + /* Maximum-paths configuration */ + struct bgp_maxpaths_cfg { + u_int16_t maxpaths_ebgp; + u_int16_t maxpaths_ibgp; + } maxpaths[AFI_MAX][SAFI_MAX]; }; /* BGP peer-group support. */ |