summaryrefslogtreecommitdiff
path: root/bgpd
diff options
context:
space:
mode:
authorDavid Lamparter <equinox@diac24.net>2012-04-16 13:54:37 +0200
committerDavid Lamparter <equinox@diac24.net>2012-04-16 13:54:53 +0200
commit9bb5870e26a4089f599b24adf822c4d1a9870a44 (patch)
treee4309095a512239ab47b5b0851d9d6fc596398cd /bgpd
parenta25a44dba84e9a6ac2b87e24472b6e9f959d845b (diff)
parent3cf6c2b4e43f44a977d218c96c26250654ae333e (diff)
bgpd: merge osr/google-bgp-multipath
this patchset allows collapsing multiple received routes in BGP into one multipath route that will, if readvertised, contain an aggregate of all received attributes.
Diffstat (limited to 'bgpd')
-rw-r--r--bgpd/Makefile.am4
-rw-r--r--bgpd/bgp_aspath.c2
-rw-r--r--bgpd/bgp_aspath.h1
-rw-r--r--bgpd/bgp_ecommunity.c2
-rw-r--r--bgpd/bgp_ecommunity.h1
-rw-r--r--bgpd/bgp_main.c4
-rw-r--r--bgpd/bgp_mpath.c737
-rw-r--r--bgpd/bgp_mpath.h80
-rw-r--r--bgpd/bgp_route.c156
-rw-r--r--bgpd/bgp_route.h6
-rw-r--r--bgpd/bgp_vty.c158
-rw-r--r--bgpd/bgp_zebra.c46
-rw-r--r--bgpd/bgp_zebra.h6
-rw-r--r--bgpd/bgpd.c9
-rw-r--r--bgpd/bgpd.h6
15 files changed, 1172 insertions, 46 deletions
diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am
index 1b17d386..3051555b 100644
--- a/bgpd/Makefile.am
+++ b/bgpd/Makefile.am
@@ -15,14 +15,14 @@ libbgp_a_SOURCES = \
bgp_debug.c bgp_route.c bgp_zebra.c bgp_open.c bgp_routemap.c \
bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \
bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \
- bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c
+ bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c
noinst_HEADERS = \
bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \
bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \
bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \
bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \
- bgp_advertise.h bgp_snmp.h bgp_vty.h
+ bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h
bgpd_SOURCES = bgp_main.c
bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@
diff --git a/bgpd/bgp_aspath.c b/bgpd/bgp_aspath.c
index 776c7127..64b36775 100644
--- a/bgpd/bgp_aspath.c
+++ b/bgpd/bgp_aspath.c
@@ -1819,7 +1819,7 @@ aspath_key_make (void *p)
}
/* If two aspath have same value then return 1 else return 0 */
-static int
+int
aspath_cmp (const void *arg1, const void *arg2)
{
const struct assegment *seg1 = ((const struct aspath *)arg1)->segments;
diff --git a/bgpd/bgp_aspath.h b/bgpd/bgp_aspath.h
index d55f9ce6..fc4dd6b7 100644
--- a/bgpd/bgp_aspath.h
+++ b/bgpd/bgp_aspath.h
@@ -72,6 +72,7 @@ extern struct aspath *aspath_prepend (struct aspath *, struct aspath *);
extern struct aspath *aspath_filter_exclude (struct aspath *, struct aspath *);
extern struct aspath *aspath_add_seq (struct aspath *, as_t);
extern struct aspath *aspath_add_confed_seq (struct aspath *, as_t);
+extern int aspath_cmp (const void *, const void *);
extern int aspath_cmp_left (const struct aspath *, const struct aspath *);
extern int aspath_cmp_left_confed (const struct aspath *, const struct aspath *);
extern struct aspath *aspath_delete_confed_seq (struct aspath *);
diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c
index 440c15a4..9f4aaa4b 100644
--- a/bgpd/bgp_ecommunity.c
+++ b/bgpd/bgp_ecommunity.c
@@ -99,7 +99,7 @@ ecommunity_add_val (struct ecommunity *ecom, struct ecommunity_val *eval)
/* This function takes pointer to Extended Communites strucutre then
create a new Extended Communities structure by uniq and sort each
Extended Communities value. */
-static struct ecommunity *
+struct ecommunity *
ecommunity_uniq_sort (struct ecommunity *ecom)
{
int i;
diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h
index 2f59dc40..92affccc 100644
--- a/bgpd/bgp_ecommunity.h
+++ b/bgpd/bgp_ecommunity.h
@@ -71,6 +71,7 @@ extern void ecommunity_free (struct ecommunity **);
extern struct ecommunity *ecommunity_parse (u_int8_t *, u_short);
extern struct ecommunity *ecommunity_dup (struct ecommunity *);
extern struct ecommunity *ecommunity_merge (struct ecommunity *, struct ecommunity *);
+extern struct ecommunity *ecommunity_uniq_sort (struct ecommunity *);
extern struct ecommunity *ecommunity_intern (struct ecommunity *);
extern int ecommunity_cmp (const void *, const void *);
extern void ecommunity_unintern (struct ecommunity **);
diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c
index 8dede587..0f1d4829 100644
--- a/bgpd/bgp_main.c
+++ b/bgpd/bgp_main.c
@@ -35,6 +35,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "routemap.h"
#include "filter.h"
#include "plist.h"
+#include "stream.h"
#include "bgpd/bgpd.h"
#include "bgpd/bgp_attr.h"
@@ -47,6 +48,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "bgpd/bgp_clist.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_filter.h"
+#include "bgpd/bgp_zebra.h"
/* bgpd options, we use GNU getopt library. */
static const struct option longopts[] =
@@ -297,6 +299,8 @@ bgp_exit (int status)
zclient_free (zclient);
if (zlookup)
zclient_free (zlookup);
+ if (bgp_nexthop_buf)
+ stream_free (bgp_nexthop_buf);
/* reverse bgp_master_init */
if (master)
diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c
new file mode 100644
index 00000000..d07830d1
--- /dev/null
+++ b/bgpd/bgp_mpath.c
@@ -0,0 +1,737 @@
+/* $QuaggaId: Format:%an, %ai, %h$ $
+ *
+ * BGP Multipath
+ * Copyright (C) 2010 Google Inc.
+ *
+ * This file is part of Quagga
+ *
+ * Quagga is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * Quagga is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Quagga; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "prefix.h"
+#include "linklist.h"
+#include "sockunion.h"
+#include "memory.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_aspath.h"
+#include "bgpd/bgp_community.h"
+#include "bgpd/bgp_ecommunity.h"
+#include "bgpd/bgp_mpath.h"
+
+/*
+ * bgp_maximum_paths_set
+ *
+ * Record maximum-paths configuration for BGP instance
+ */
+int
+bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi,
+ int peertype, u_int16_t maxpaths)
+{
+ if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
+ return -1;
+
+ switch (peertype)
+ {
+ case BGP_PEER_IBGP:
+ bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths;
+ break;
+ case BGP_PEER_EBGP:
+ bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * bgp_maximum_paths_unset
+ *
+ * Remove maximum-paths configuration from BGP instance
+ */
+int
+bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi,
+ int peertype)
+{
+ if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
+ return -1;
+
+ switch (peertype)
+ {
+ case BGP_PEER_IBGP:
+ bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
+ break;
+ case BGP_PEER_EBGP:
+ bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * bgp_info_nexthop_cmp
+ *
+ * Compare the nexthops of two paths. Return value is less than, equal to,
+ * or greater than zero if bi1 is respectively less than, equal to,
+ * or greater than bi2.
+ */
+static int
+bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2)
+{
+ struct attr_extra *ae1, *ae2;
+ int compare;
+
+ ae1 = bi1->attr->extra;
+ ae2 = bi2->attr->extra;
+
+ compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop);
+
+ if (!compare && ae1 && ae2 && (ae1->mp_nexthop_len == ae2->mp_nexthop_len))
+ {
+ switch (ae1->mp_nexthop_len)
+ {
+ case 4:
+ case 12:
+ compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in,
+ &ae2->mp_nexthop_global_in);
+ break;
+#ifdef HAVE_IPV6
+ case 16:
+ compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
+ &ae2->mp_nexthop_global);
+ break;
+ case 32:
+ compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
+ &ae2->mp_nexthop_global);
+ if (!compare)
+ compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local,
+ &ae2->mp_nexthop_local);
+ break;
+#endif /* HAVE_IPV6 */
+ }
+ }
+
+ return compare;
+}
+
+/*
+ * bgp_info_mpath_cmp
+ *
+ * This function determines our multipath list ordering. By ordering
+ * the list we can deterministically select which paths are included
+ * in the multipath set. The ordering also helps in detecting changes
+ * in the multipath selection so we can detect whether to send an
+ * update to zebra.
+ *
+ * The order of paths is determined first by received nexthop, and then
+ * by peer address if the nexthops are the same.
+ */
+static int
+bgp_info_mpath_cmp (void *val1, void *val2)
+{
+ struct bgp_info *bi1, *bi2;
+ int compare;
+
+ bi1 = val1;
+ bi2 = val2;
+
+ compare = bgp_info_nexthop_cmp (bi1, bi2);
+
+ if (!compare)
+ compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote);
+
+ return compare;
+}
+
+/*
+ * bgp_mp_list_init
+ *
+ * Initialize the mp_list, which holds the list of multipaths
+ * selected by bgp_best_selection
+ */
+void
+bgp_mp_list_init (struct list *mp_list)
+{
+ assert (mp_list);
+ memset (mp_list, 0, sizeof (struct list));
+ mp_list->cmp = bgp_info_mpath_cmp;
+}
+
+/*
+ * bgp_mp_list_clear
+ *
+ * Clears all entries out of the mp_list
+ */
+void
+bgp_mp_list_clear (struct list *mp_list)
+{
+ assert (mp_list);
+ list_delete_all_node (mp_list);
+}
+
+/*
+ * bgp_mp_list_add
+ *
+ * Adds a multipath entry to the mp_list
+ */
+void
+bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo)
+{
+ assert (mp_list && mpinfo);
+ listnode_add_sort (mp_list, mpinfo);
+}
+
+/*
+ * bgp_info_mpath_new
+ *
+ * Allocate and zero memory for a new bgp_info_mpath element
+ */
+static struct bgp_info_mpath *
+bgp_info_mpath_new (void)
+{
+ struct bgp_info_mpath *new_mpath;
+ new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath));
+ return new_mpath;
+}
+
+/*
+ * bgp_info_mpath_free
+ *
+ * Release resources for a bgp_info_mpath element and zero out pointer
+ */
+void
+bgp_info_mpath_free (struct bgp_info_mpath **mpath)
+{
+ if (mpath && *mpath)
+ {
+ if ((*mpath)->mp_attr)
+ bgp_attr_unintern ((*mpath)->mp_attr);
+ XFREE (MTYPE_BGP_MPATH_INFO, *mpath);
+ *mpath = NULL;
+ }
+}
+
+/*
+ * bgp_info_mpath_get
+ *
+ * Fetch the mpath element for the given bgp_info. Used for
+ * doing lazy allocation.
+ */
+static struct bgp_info_mpath *
+bgp_info_mpath_get (struct bgp_info *binfo)
+{
+ struct bgp_info_mpath *mpath;
+ if (!binfo->mpath)
+ {
+ mpath = bgp_info_mpath_new();
+ if (!mpath)
+ return NULL;
+ binfo->mpath = mpath;
+ mpath->mp_info = binfo;
+ }
+ return binfo->mpath;
+}
+
+/*
+ * bgp_info_mpath_enqueue
+ *
+ * Enqueue a path onto the multipath list given the previous multipath
+ * list entry
+ */
+static void
+bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo)
+{
+ struct bgp_info_mpath *prev, *mpath;
+
+ prev = bgp_info_mpath_get (prev_info);
+ mpath = bgp_info_mpath_get (binfo);
+ if (!prev || !mpath)
+ return;
+
+ mpath->mp_next = prev->mp_next;
+ mpath->mp_prev = prev;
+ if (prev->mp_next)
+ prev->mp_next->mp_prev = mpath;
+ prev->mp_next = mpath;
+
+ SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
+}
+
+/*
+ * bgp_info_mpath_dequeue
+ *
+ * Remove a path from the multipath list
+ */
+void
+bgp_info_mpath_dequeue (struct bgp_info *binfo)
+{
+ struct bgp_info_mpath *mpath = binfo->mpath;
+ if (!mpath)
+ return;
+ if (mpath->mp_prev)
+ mpath->mp_prev->mp_next = mpath->mp_next;
+ if (mpath->mp_next)
+ mpath->mp_next->mp_prev = mpath->mp_prev;
+ mpath->mp_next = mpath->mp_prev = NULL;
+ UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
+}
+
+/*
+ * bgp_info_mpath_next
+ *
+ * Given a bgp_info, return the next multipath entry
+ */
+struct bgp_info *
+bgp_info_mpath_next (struct bgp_info *binfo)
+{
+ if (!binfo->mpath || !binfo->mpath->mp_next)
+ return NULL;
+ return binfo->mpath->mp_next->mp_info;
+}
+
+/*
+ * bgp_info_mpath_first
+ *
+ * Given bestpath bgp_info, return the first multipath entry.
+ */
+struct bgp_info *
+bgp_info_mpath_first (struct bgp_info *binfo)
+{
+ return bgp_info_mpath_next (binfo);
+}
+
+/*
+ * bgp_info_mpath_count
+ *
+ * Given the bestpath bgp_info, return the number of multipath entries
+ */
+u_int32_t
+bgp_info_mpath_count (struct bgp_info *binfo)
+{
+ if (!binfo->mpath)
+ return 0;
+ return binfo->mpath->mp_count;
+}
+
+/*
+ * bgp_info_mpath_count_set
+ *
+ * Sets the count of multipaths into bestpath's mpath element
+ */
+static void
+bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count)
+{
+ struct bgp_info_mpath *mpath;
+ if (!count && !binfo->mpath)
+ return;
+ mpath = bgp_info_mpath_get (binfo);
+ if (!mpath)
+ return;
+ mpath->mp_count = count;
+}
+
+/*
+ * bgp_info_mpath_attr
+ *
+ * Given bestpath bgp_info, return aggregated attribute set used
+ * for advertising the multipath route
+ */
+struct attr *
+bgp_info_mpath_attr (struct bgp_info *binfo)
+{
+ if (!binfo->mpath)
+ return NULL;
+ return binfo->mpath->mp_attr;
+}
+
+/*
+ * bgp_info_mpath_attr_set
+ *
+ * Sets the aggregated attribute into bestpath's mpath element
+ */
+static void
+bgp_info_mpath_attr_set (struct bgp_info *binfo, struct attr *attr)
+{
+ struct bgp_info_mpath *mpath;
+ if (!attr && !binfo->mpath)
+ return;
+ mpath = bgp_info_mpath_get (binfo);
+ if (!mpath)
+ return;
+ mpath->mp_attr = attr;
+}
+
+/*
+ * bgp_info_mpath_update
+ *
+ * Compare and sync up the multipath list with the mp_list generated by
+ * bgp_best_selection
+ */
+void
+bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best,
+ struct bgp_info *old_best, struct list *mp_list,
+ struct bgp_maxpaths_cfg *mpath_cfg)
+{
+ u_int16_t maxpaths, mpath_count, old_mpath_count;
+ struct listnode *mp_node, *mp_next_node;
+ struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath;
+ int mpath_changed, debug;
+ char pfx_buf[INET_ADDRSTRLEN], nh_buf[2][INET_ADDRSTRLEN];
+
+ mpath_changed = 0;
+ maxpaths = BGP_DEFAULT_MAXPATHS;
+ mpath_count = 0;
+ cur_mpath = NULL;
+ old_mpath_count = 0;
+ prev_mpath = new_best;
+ mp_node = listhead (mp_list);
+ debug = BGP_DEBUG (events, EVENTS);
+
+ if (debug)
+ prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf));
+
+ if (new_best)
+ {
+ mpath_count++;
+ if (new_best != old_best)
+ bgp_info_mpath_dequeue (new_best);
+ maxpaths = (peer_sort (new_best->peer) == BGP_PEER_IBGP) ?
+ mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp;
+ }
+
+ if (old_best)
+ {
+ cur_mpath = bgp_info_mpath_first (old_best);
+ old_mpath_count = bgp_info_mpath_count (old_best);
+ bgp_info_mpath_count_set (old_best, 0);
+ bgp_info_mpath_dequeue (old_best);
+ }
+
+ /*
+ * We perform an ordered walk through both lists in parallel.
+ * The reason for the ordered walk is that if there are paths
+ * that were previously multipaths and are still multipaths, the walk
+ * should encounter them in both lists at the same time. Otherwise
+ * there will be paths that are in one list or another, and we
+ * will deal with these separately.
+ *
+ * Note that new_best might be somewhere in the mp_list, so we need
+ * to skip over it
+ */
+ while (mp_node || cur_mpath)
+ {
+ /*
+ * We can bail out of this loop if all existing paths on the
+ * multipath list have been visited (for cleanup purposes) and
+ * the maxpath requirement is fulfulled
+ */
+ if (!cur_mpath && (mpath_count >= maxpaths))
+ break;
+
+ mp_next_node = mp_node ? listnextnode (mp_node) : NULL;
+ next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL;
+
+ /*
+ * If equal, the path was a multipath and is still a multipath.
+ * Insert onto new multipath list if maxpaths allows.
+ */
+ if (mp_node && (listgetdata (mp_node) == cur_mpath))
+ {
+ list_delete_node (mp_list, mp_node);
+ bgp_info_mpath_dequeue (cur_mpath);
+ if ((mpath_count < maxpaths) &&
+ bgp_info_nexthop_cmp (prev_mpath, cur_mpath))
+ {
+ bgp_info_mpath_enqueue (prev_mpath, cur_mpath);
+ prev_mpath = cur_mpath;
+ mpath_count++;
+ }
+ else
+ {
+ mpath_changed = 1;
+ if (debug)
+ zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
+ inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
+ nh_buf[0], sizeof (nh_buf[0])),
+ sockunion2str (cur_mpath->peer->su_remote,
+ nh_buf[1], sizeof (nh_buf[1])));
+ }
+ mp_node = mp_next_node;
+ cur_mpath = next_mpath;
+ continue;
+ }
+
+ if (cur_mpath && (!mp_node ||
+ (bgp_info_mpath_cmp (cur_mpath,
+ listgetdata (mp_node)) < 0)))
+ {
+ /*
+ * If here, we have an old multipath and either the mp_list
+ * is finished or the next mp_node points to a later
+ * multipath, so we need to purge this path from the
+ * multipath list
+ */
+ bgp_info_mpath_dequeue (cur_mpath);
+ mpath_changed = 1;
+ if (debug)
+ zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
+ inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
+ nh_buf[0], sizeof (nh_buf[0])),
+ sockunion2str (cur_mpath->peer->su_remote,
+ nh_buf[1], sizeof (nh_buf[1])));
+ cur_mpath = next_mpath;
+ }
+ else
+ {
+ /*
+ * If here, we have a path on the mp_list that was not previously
+ * a multipath (due to non-equivalance or maxpaths exceeded),
+ * or the matching multipath is sorted later in the multipath
+ * list. Before we enqueue the path on the new multipath list,
+ * make sure its not on the old_best multipath list or referenced
+ * via next_mpath:
+ * - If next_mpath points to this new path, update next_mpath to
+ * point to the multipath after this one
+ * - Dequeue the path from the multipath list just to make sure
+ */
+ new_mpath = listgetdata (mp_node);
+ list_delete_node (mp_list, mp_node);
+ if ((mpath_count < maxpaths) && (new_mpath != new_best) &&
+ bgp_info_nexthop_cmp (prev_mpath, new_mpath))
+ {
+ if (new_mpath == next_mpath)
+ next_mpath = bgp_info_mpath_next (new_mpath);
+ bgp_info_mpath_dequeue (new_mpath);
+
+ bgp_info_mpath_enqueue (prev_mpath, new_mpath);
+ prev_mpath = new_mpath;
+ mpath_changed = 1;
+ mpath_count++;
+ if (debug)
+ zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf,
+ inet_ntop (AF_INET, &new_mpath->attr->nexthop,
+ nh_buf[0], sizeof (nh_buf[0])),
+ sockunion2str (new_mpath->peer->su_remote,
+ nh_buf[1], sizeof (nh_buf[1])));
+ }
+ mp_node = mp_next_node;
+ }
+ }
+
+ if (new_best)
+ {
+ bgp_info_mpath_count_set (new_best, mpath_count-1);
+ if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count))
+ SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG);
+ }
+}
+
+/*
+ * bgp_mp_dmed_deselect
+ *
+ * Clean up multipath information for BGP_INFO_DMED_SELECTED path that
+ * is not selected as best path
+ */
+void
+bgp_mp_dmed_deselect (struct bgp_info *dmed_best)
+{
+ struct bgp_info *mpinfo, *mpnext;
+
+ if (!dmed_best)
+ return;
+
+ for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext)
+ {
+ mpnext = bgp_info_mpath_next (mpinfo);
+ bgp_info_mpath_dequeue (mpinfo);
+ }
+
+ bgp_info_mpath_count_set (dmed_best, 0);
+ UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG);
+ assert (bgp_info_mpath_first (dmed_best) == 0);
+}
+
+/*
+ * bgp_info_mpath_aggregate_update
+ *
+ * Set the multipath aggregate attribute. We need to see if the
+ * aggregate has changed and then set the ATTR_CHANGED flag on the
+ * bestpath info so that a peer update will be generated. The
+ * change is detected by generating the current attribute,
+ * interning it, and then comparing the interned pointer with the
+ * current value. We can skip this generate/compare step if there
+ * is no change in multipath selection and no attribute change in
+ * any multipath.
+ */
+void
+bgp_info_mpath_aggregate_update (struct bgp_info *new_best,
+ struct bgp_info *old_best)
+{
+ struct bgp_info *mpinfo;
+ struct aspath *aspath;
+ struct aspath *asmerge;
+ struct attr *new_attr, *old_attr;
+ u_char origin, attr_chg;
+ struct community *community, *commerge;
+ struct ecommunity *ecomm, *ecommerge;
+ struct attr_extra *ae;
+ struct attr attr = { 0 };
+
+ if (old_best && (old_best != new_best) &&
+ (old_attr = bgp_info_mpath_attr (old_best)))
+ {
+ bgp_attr_unintern (old_attr);
+ bgp_info_mpath_attr_set (old_best, NULL);
+ }
+
+ if (!new_best)
+ return;
+
+ if (!bgp_info_mpath_count (new_best))
+ {
+ if ((new_attr = bgp_info_mpath_attr (new_best)))
+ {
+ bgp_attr_unintern (new_attr);
+ bgp_info_mpath_attr_set (new_best, NULL);
+ SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
+ }
+ return;
+ }
+
+ /*
+ * Bail out here if the following is true:
+ * - MULTIPATH_CHG bit is not set on new_best, and
+ * - No change in bestpath, and
+ * - ATTR_CHANGED bit is not set on new_best or any of the multipaths
+ */
+ if (!CHECK_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG) &&
+ (old_best == new_best))
+ {
+ attr_chg = 0;
+
+ if (CHECK_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED))
+ attr_chg = 1;
+ else
+ for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
+ mpinfo = bgp_info_mpath_next (mpinfo))
+ {
+ if (CHECK_FLAG (mpinfo->flags, BGP_INFO_ATTR_CHANGED))
+ {
+ attr_chg = 1;
+ break;
+ }
+ }
+
+ if (!attr_chg)
+ {
+ assert (bgp_info_mpath_attr (new_best));
+ return;
+ }
+ }
+
+ bgp_attr_dup (&attr, new_best->attr);
+
+ /* aggregate attribute from multipath constituents */
+ aspath = aspath_dup (attr.aspath);
+ origin = attr.origin;
+ community = attr.community ? community_dup (attr.community) : NULL;
+ ae = attr.extra;
+ ecomm = (ae && ae->ecommunity) ? ecommunity_dup (ae->ecommunity) : NULL;
+
+ for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
+ mpinfo = bgp_info_mpath_next (mpinfo))
+ {
+ asmerge = aspath_aggregate (aspath, mpinfo->attr->aspath);
+ aspath_free (aspath);
+ aspath = asmerge;
+
+ if (origin < mpinfo->attr->origin)
+ origin = mpinfo->attr->origin;
+
+ if (mpinfo->attr->community)
+ {
+ if (community)
+ {
+ commerge = community_merge (community, mpinfo->attr->community);
+ community = community_uniq_sort (commerge);
+ community_free (commerge);
+ }
+ else
+ community = community_dup (mpinfo->attr->community);
+ }
+
+ ae = mpinfo->attr->extra;
+ if (ae && ae->ecommunity)
+ {
+ if (ecomm)
+ {
+ ecommerge = ecommunity_merge (ecomm, ae->ecommunity);
+ ecomm = ecommunity_uniq_sort (ecommerge);
+ ecommunity_free (ecommerge);
+ }
+ else
+ ecomm = ecommunity_dup (ae->ecommunity);
+ }
+ }
+
+ attr.aspath = aspath;
+ attr.origin = origin;
+ if (community)
+ {
+ attr.community = community;
+ attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_COMMUNITIES);
+ }
+ if (ecomm)
+ {
+ ae = bgp_attr_extra_get (&attr);
+ ae->ecommunity = ecomm;
+ attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES);
+ }
+
+ /* Zap multipath attr nexthop so we set nexthop to self */
+ attr.nexthop.s_addr = 0;
+#ifdef HAVE_IPV6
+ if (attr.extra)
+ memset (&attr.extra->mp_nexthop_global, 0, sizeof (struct in6_addr));
+#endif /* HAVE_IPV6 */
+
+ /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */
+
+ new_attr = bgp_attr_intern (&attr);
+ bgp_attr_extra_free (&attr);
+
+ if (new_attr != bgp_info_mpath_attr (new_best))
+ {
+ if ((old_attr = bgp_info_mpath_attr (new_best)))
+ bgp_attr_unintern (old_attr);
+ bgp_info_mpath_attr_set (new_best, new_attr);
+ SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
+ }
+ else
+ bgp_attr_unintern (new_attr);
+}
diff --git a/bgpd/bgp_mpath.h b/bgpd/bgp_mpath.h
new file mode 100644
index 00000000..37b9ac8b
--- /dev/null
+++ b/bgpd/bgp_mpath.h
@@ -0,0 +1,80 @@
+/* $QuaggaId: Format:%an, %ai, %h$ $
+ *
+ * BGP Multipath
+ * Copyright (C) 2010 Google Inc.
+ *
+ * This file is part of Quagga
+ *
+ * Quagga is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * Quagga is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Quagga; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _QUAGGA_BGP_MPATH_H
+#define _QUAGGA_BGP_MPATH_H
+
+/* BGP default maximum-paths */
+#define BGP_DEFAULT_MAXPATHS 1
+
+/* Supplemental information linked to bgp_info for keeping track of
+ * multipath selections, lazily allocated to save memory
+ */
+struct bgp_info_mpath
+{
+ /* Points to the first multipath (on bestpath) or the next multipath */
+ struct bgp_info_mpath *mp_next;
+
+ /* Points to the previous multipath or NULL on bestpath */
+ struct bgp_info_mpath *mp_prev;
+
+ /* Points to bgp_info associated with this multipath info */
+ struct bgp_info *mp_info;
+
+ /* When attached to best path, the number of selected multipaths */
+ u_int32_t mp_count;
+
+ /* Aggregated attribute for advertising multipath route */
+ struct attr *mp_attr;
+};
+
+/* Functions to support maximum-paths configuration */
+extern int bgp_maximum_paths_set (struct bgp *, afi_t, safi_t, int, u_int16_t);
+extern int bgp_maximum_paths_unset (struct bgp *, afi_t, safi_t, int);
+
+/* Functions used by bgp_best_selection to record current
+ * multipath selections
+ */
+extern void bgp_mp_list_init (struct list *);
+extern void bgp_mp_list_clear (struct list *);
+extern void bgp_mp_list_add (struct list *, struct bgp_info *);
+extern void bgp_mp_dmed_deselect (struct bgp_info *);
+extern void bgp_info_mpath_update (struct bgp_node *, struct bgp_info *,
+ struct bgp_info *, struct list *,
+ struct bgp_maxpaths_cfg *);
+extern void bgp_info_mpath_aggregate_update (struct bgp_info *,
+ struct bgp_info *);
+
+/* Unlink and free multipath information associated with a bgp_info */
+extern void bgp_info_mpath_dequeue (struct bgp_info *);
+extern void bgp_info_mpath_free (struct bgp_info_mpath **);
+
+/* Walk list of multipaths associated with a best path */
+extern struct bgp_info *bgp_info_mpath_first (struct bgp_info *);
+extern struct bgp_info *bgp_info_mpath_next (struct bgp_info *);
+
+/* Accessors for multipath information */
+extern u_int32_t bgp_info_mpath_count (struct bgp_info *);
+extern struct attr *bgp_info_mpath_attr (struct bgp_info *);
+
+#endif /* _QUAGGA_BGP_MPATH_H */
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index ba530321..087f8396 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -54,6 +54,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "bgpd/bgp_advertise.h"
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_mpath.h"
/* Extern from bgp_dump.c */
extern const char *bgp_origin_str[];
@@ -140,6 +141,7 @@ bgp_info_free (struct bgp_info *binfo)
bgp_attr_unintern (&binfo->attr);
bgp_info_extra_free (&binfo->extra);
+ bgp_info_mpath_free (&binfo->mpath);
peer_unlock (binfo->peer); /* bgp_info peer reference */
@@ -210,6 +212,7 @@ bgp_info_reap (struct bgp_node *rn, struct bgp_info *ri)
else
rn->info = ri->next;
+ bgp_info_mpath_dequeue (ri);
bgp_info_unlock (ri);
bgp_unlock_node (rn);
}
@@ -316,7 +319,8 @@ bgp_med_value (struct attr *attr, struct bgp *bgp)
/* Compare two bgp route entity. br is preferable then return 1. */
static int
-bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist)
+bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist,
+ int *paths_eq)
{
u_int32_t new_pref;
u_int32_t exist_pref;
@@ -331,6 +335,9 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist)
int internal_as_route = 0;
int confed_as_route = 0;
int ret;
+ uint32_t newm, existm;
+
+ *paths_eq = 0;
/* 0. Null check. */
if (new == NULL)
@@ -454,18 +461,32 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist)
return 0;
/* 8. IGP metric check. */
- if (new->extra || exist->extra)
- {
- uint32_t newm = (new->extra ? new->extra->igpmetric : 0);
- uint32_t existm = (exist->extra ? exist->extra->igpmetric : 0);
-
- if (newm < existm)
- return 1;
- if (newm > existm)
- return 0;
- }
+ newm = (new->extra ? new->extra->igpmetric : 0);
+ existm = (exist->extra ? exist->extra->igpmetric : 0);
+ if (newm < existm)
+ ret = 1;
+ if (newm > existm)
+ ret = 0;
/* 9. Maximum path check. */
+ if (newm == existm)
+ {
+ if ((peer_sort (new->peer) == BGP_PEER_IBGP))
+ {
+ if (aspath_cmp (new->attr->aspath, exist->attr->aspath))
+ *paths_eq = 1;
+ }
+ else if (new->peer->as == exist->peer->as)
+ *paths_eq = 1;
+ }
+ else
+ {
+ /*
+ * TODO: If unequal cost ibgp multipath is enabled we can
+ * mark the paths as equal here instead of returning
+ */
+ return ret;
+ }
/* 10. If both paths are external, prefer the path that was received
first (the oldest one). This step minimizes route-flap, since a
@@ -764,10 +785,12 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
struct bgp *bgp;
int transparent;
int reflect;
+ struct attr *riattr;
from = ri->peer;
filter = &peer->filter[afi][safi];
bgp = peer->bgp;
+ riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr;
if (DISABLE_BGP_ANNOUNCE)
return 0;
@@ -782,11 +805,11 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
/* If peer's id and route's nexthop are same. draft-ietf-idr-bgp4-23 5.1.3 */
if (p->family == AF_INET
- && IPV4_ADDR_SAME(&peer->remote_id, &ri->attr->nexthop))
+ && IPV4_ADDR_SAME(&peer->remote_id, &riattr->nexthop))
return 0;
#ifdef HAVE_IPV6
if (p->family == AF_INET6
- && IPV6_ADDR_SAME(&peer->remote_id, &ri->attr->nexthop))
+ && IPV6_ADDR_SAME(&peer->remote_id, &riattr->nexthop))
return 0;
#endif
@@ -814,14 +837,14 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
transparent = 0;
/* If community is not disabled check the no-export and local. */
- if (! transparent && bgp_community_filter (peer, ri->attr))
+ if (! transparent && bgp_community_filter (peer, riattr))
return 0;
/* If the attribute has originator-id and it is same as remote
peer's id. */
- if (ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
+ if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
{
- if (IPV4_ADDR_SAME (&peer->remote_id, &ri->attr->extra->originator_id))
+ if (IPV4_ADDR_SAME (&peer->remote_id, &riattr->extra->originator_id))
{
if (BGP_DEBUG (filter, FILTER))
zlog (peer->log, LOG_DEBUG,
@@ -844,7 +867,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
}
/* Output filter check. */
- if (bgp_output_filter (peer, p, ri->attr, afi, safi) == FILTER_DENY)
+ if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY)
{
if (BGP_DEBUG (filter, FILTER))
zlog (peer->log, LOG_DEBUG,
@@ -857,7 +880,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
#ifdef BGP_SEND_ASPATH_CHECK
/* AS path loop check. */
- if (aspath_loop_check (ri->attr->aspath, peer->as))
+ if (aspath_loop_check (riattr->aspath, peer->as))
{
if (BGP_DEBUG (filter, FILTER))
zlog (peer->log, LOG_DEBUG,
@@ -870,7 +893,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
/* If we're a CONFED we need to loop check the CONFED ID too */
if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION))
{
- if (aspath_loop_check(ri->attr->aspath, bgp->confed_id))
+ if (aspath_loop_check(riattr->aspath, bgp->confed_id))
{
if (BGP_DEBUG (filter, FILTER))
zlog (peer->log, LOG_DEBUG,
@@ -911,7 +934,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p,
}
/* For modify attribute, copy it to temporary structure. */
- bgp_attr_dup (attr, ri->attr);
+ bgp_attr_dup (attr, riattr);
/* If local-preference is not set. */
if ((peer_sort (peer) == BGP_PEER_IBGP
@@ -1069,9 +1092,11 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient,
struct bgp_filter *filter;
struct bgp_info info;
struct peer *from;
+ struct attr *riattr;
from = ri->peer;
filter = &rsclient->filter[afi][safi];
+ riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr;
if (DISABLE_BGP_ANNOUNCE)
return 0;
@@ -1099,10 +1124,10 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient,
/* If the attribute has originator-id and it is same as remote
peer's id. */
- if (ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
+ if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
{
if (IPV4_ADDR_SAME (&rsclient->remote_id,
- &ri->attr->extra->originator_id))
+ &riattr->extra->originator_id))
{
if (BGP_DEBUG (filter, FILTER))
zlog (rsclient->log, LOG_DEBUG,
@@ -1125,7 +1150,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient,
}
/* Output filter check. */
- if (bgp_output_filter (rsclient, p, ri->attr, afi, safi) == FILTER_DENY)
+ if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY)
{
if (BGP_DEBUG (filter, FILTER))
zlog (rsclient->log, LOG_DEBUG,
@@ -1138,7 +1163,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient,
#ifdef BGP_SEND_ASPATH_CHECK
/* AS path loop check. */
- if (aspath_loop_check (ri->attr->aspath, rsclient->as))
+ if (aspath_loop_check (riattr->aspath, rsclient->as))
{
if (BGP_DEBUG (filter, FILTER))
zlog (rsclient->log, LOG_DEBUG,
@@ -1149,7 +1174,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient,
#endif /* BGP_SEND_ASPATH_CHECK */
/* For modify attribute, copy it to temporary structure. */
- bgp_attr_dup (attr, ri->attr);
+ bgp_attr_dup (attr, riattr);
/* next-hop-set */
if ((p->family == AF_INET && attr->nexthop.s_addr == 0)
@@ -1265,7 +1290,9 @@ struct bgp_info_pair
};
static void
-bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *result)
+bgp_best_selection (struct bgp *bgp, struct bgp_node *rn,
+ struct bgp_maxpaths_cfg *mpath_cfg,
+ struct bgp_info_pair *result)
{
struct bgp_info *new_select;
struct bgp_info *old_select;
@@ -1273,7 +1300,13 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *
struct bgp_info *ri1;
struct bgp_info *ri2;
struct bgp_info *nextri = NULL;
-
+ int paths_eq, do_mpath;
+ struct list mp_list;
+
+ bgp_mp_list_init (&mp_list);
+ do_mpath = (mpath_cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS ||
+ mpath_cfg->maxpaths_ibgp != BGP_DEFAULT_MAXPATHS);
+
/* bgp deterministic-med */
new_select = NULL;
if (bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
@@ -1285,6 +1318,9 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *
continue;
new_select = ri1;
+ if (do_mpath)
+ bgp_mp_list_add (&mp_list, ri1);
+ old_select = CHECK_FLAG (ri1->flags, BGP_INFO_SELECTED) ? ri1 : NULL;
if (ri1->next)
for (ri2 = ri1->next; ri2; ri2 = ri2->next)
{
@@ -1297,17 +1333,30 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *
|| aspath_cmp_left_confed (ri1->attr->aspath,
ri2->attr->aspath))
{
- if (bgp_info_cmp (bgp, ri2, new_select))
+ if (CHECK_FLAG (ri2->flags, BGP_INFO_SELECTED))
+ old_select = ri2;
+ if (bgp_info_cmp (bgp, ri2, new_select, &paths_eq))
{
bgp_info_unset_flag (rn, new_select, BGP_INFO_DMED_SELECTED);
new_select = ri2;
+ if (do_mpath && !paths_eq)
+ {
+ bgp_mp_list_clear (&mp_list);
+ bgp_mp_list_add (&mp_list, ri2);
+ }
}
+ if (do_mpath && paths_eq)
+ bgp_mp_list_add (&mp_list, ri2);
+
bgp_info_set_flag (rn, ri2, BGP_INFO_DMED_CHECK);
}
}
bgp_info_set_flag (rn, new_select, BGP_INFO_DMED_CHECK);
bgp_info_set_flag (rn, new_select, BGP_INFO_DMED_SELECTED);
+
+ bgp_info_mpath_update (rn, new_select, old_select, &mp_list, mpath_cfg);
+ bgp_mp_list_clear (&mp_list);
}
/* Check old selected route and new selected route. */
@@ -1339,14 +1388,37 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *
bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_CHECK);
bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_SELECTED);
- if (bgp_info_cmp (bgp, ri, new_select))
- new_select = ri;
+ if (bgp_info_cmp (bgp, ri, new_select, &paths_eq))
+ {
+ if (do_mpath && bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
+ bgp_mp_dmed_deselect (new_select);
+
+ new_select = ri;
+
+ if (do_mpath && !paths_eq)
+ {
+ bgp_mp_list_clear (&mp_list);
+ bgp_mp_list_add (&mp_list, ri);
+ }
+ }
+ else if (do_mpath && bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
+ bgp_mp_dmed_deselect (ri);
+
+ if (do_mpath && paths_eq)
+ bgp_mp_list_add (&mp_list, ri);
}
- result->old = old_select;
- result->new = new_select;
- return;
+ if (!bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
+ bgp_info_mpath_update (rn, new_select, old_select, &mp_list, mpath_cfg);
+
+ bgp_info_mpath_aggregate_update (new_select, old_select);
+ bgp_mp_list_clear (&mp_list);
+
+ result->old = old_select;
+ result->new = new_select;
+
+ return;
}
static int
@@ -1420,7 +1492,7 @@ bgp_process_rsclient (struct work_queue *wq, void *data)
struct peer *rsclient = rn->table->owner;
/* Best path selection. */
- bgp_best_selection (bgp, rn, &old_and_new);
+ bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new);
new_select = old_and_new.new;
old_select = old_and_new.old;
@@ -1440,7 +1512,8 @@ bgp_process_rsclient (struct work_queue *wq, void *data)
{
bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED);
bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED);
- }
+ UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG);
+ }
bgp_process_announce_selected (rsclient, new_select, rn,
afi, safi);
@@ -1454,6 +1527,7 @@ bgp_process_rsclient (struct work_queue *wq, void *data)
{
bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED);
bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED);
+ UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG);
}
bgp_process_announce_selected (rsclient, new_select, rn, afi, safi);
}
@@ -1481,7 +1555,7 @@ bgp_process_main (struct work_queue *wq, void *data)
struct peer *peer;
/* Best path selection. */
- bgp_best_selection (bgp, rn, &old_and_new);
+ bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new);
old_select = old_and_new.old;
new_select = old_and_new.new;
@@ -1490,9 +1564,11 @@ bgp_process_main (struct work_queue *wq, void *data)
{
if (! CHECK_FLAG (old_select->flags, BGP_INFO_ATTR_CHANGED))
{
- if (CHECK_FLAG (old_select->flags, BGP_INFO_IGP_CHANGED))
+ if (CHECK_FLAG (old_select->flags, BGP_INFO_IGP_CHANGED) ||
+ CHECK_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG))
bgp_zebra_announce (p, old_select, bgp, safi);
+ UNSET_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG);
UNSET_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED);
return WQ_SUCCESS;
}
@@ -1504,6 +1580,7 @@ bgp_process_main (struct work_queue *wq, void *data)
{
bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED);
bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED);
+ UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG);
}
@@ -5932,6 +6009,11 @@ route_vty_out_detail (struct vty *vty, struct bgp *bgp, struct prefix *p,
if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ATOMIC_AGGREGATE))
vty_out (vty, ", atomic-aggregate");
+ if (CHECK_FLAG (binfo->flags, BGP_INFO_MULTIPATH) ||
+ (CHECK_FLAG (binfo->flags, BGP_INFO_SELECTED) &&
+ bgp_info_mpath_count (binfo)))
+ vty_out (vty, ", multipath");
+
if (CHECK_FLAG (binfo->flags, BGP_INFO_SELECTED))
vty_out (vty, ", best");
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index 7adc573b..3d2eea51 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -57,6 +57,10 @@ struct bgp_info
/* Extra information */
struct bgp_info_extra *extra;
+
+ /* Multipath information */
+ struct bgp_info_mpath *mpath;
+
/* Uptime. */
time_t uptime;
@@ -76,6 +80,8 @@ struct bgp_info
#define BGP_INFO_STALE (1 << 8)
#define BGP_INFO_REMOVED (1 << 9)
#define BGP_INFO_COUNTED (1 << 10)
+#define BGP_INFO_MULTIPATH (1 << 11)
+#define BGP_INFO_MULTIPATH_CHG (1 << 12)
/* BGP route type. This can be static, RIP, OSPF, BGP etc. */
u_char type;
diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
index f65bb157..cbe0b443 100644
--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
@@ -48,6 +48,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_mpath.h"
extern struct in_addr router_id_zebra;
@@ -650,6 +651,149 @@ DEFUN (no_bgp_confederation_peers,
return CMD_SUCCESS;
}
+/* Maximum-paths configuration */
+DEFUN (bgp_maxpaths,
+ bgp_maxpaths_cmd,
+ "maximum-paths <1-255>",
+ "Forward packets over multiple paths\n"
+ "Number of paths\n")
+{
+ struct bgp *bgp;
+ u_int16_t maxpaths;
+ int ret;
+
+ bgp = vty->index;
+
+ VTY_GET_INTEGER_RANGE ("maximum-paths", maxpaths, argv[0], 1, 255);
+
+ ret = bgp_maximum_paths_set (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+ BGP_PEER_EBGP, maxpaths);
+ if (ret < 0)
+ {
+ vty_out (vty,
+ "%% Failed to set maximum-paths %u for afi %u, safi %u%s",
+ maxpaths, bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+ return CMD_WARNING;
+ }
+
+ return CMD_SUCCESS;
+}
+
+DEFUN (bgp_maxpaths_ibgp,
+ bgp_maxpaths_ibgp_cmd,
+ "maximum-paths ibgp <1-255>",
+ "Forward packets over multiple paths\n"
+ "iBGP-multipath\n"
+ "Number of paths\n")
+{
+ struct bgp *bgp;
+ u_int16_t maxpaths;
+ int ret;
+
+ bgp = vty->index;
+
+ VTY_GET_INTEGER_RANGE ("maximum-paths", maxpaths, argv[0], 1, 255);
+
+ ret = bgp_maximum_paths_set (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+ BGP_PEER_IBGP, maxpaths);
+ if (ret < 0)
+ {
+ vty_out (vty,
+ "%% Failed to set maximum-paths ibgp %u for afi %u, safi %u%s",
+ maxpaths, bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+ return CMD_WARNING;
+ }
+
+ return CMD_SUCCESS;
+}
+
+DEFUN (no_bgp_maxpaths,
+ no_bgp_maxpaths_cmd,
+ "no maximum-paths",
+ NO_STR
+ "Forward packets over multiple paths\n"
+ "Number of paths\n")
+{
+ struct bgp *bgp;
+ int ret;
+
+ bgp = vty->index;
+
+ ret = bgp_maximum_paths_unset (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+ BGP_PEER_EBGP);
+ if (ret < 0)
+ {
+ vty_out (vty,
+ "%% Failed to unset maximum-paths for afi %u, safi %u%s",
+ bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+ return CMD_WARNING;
+ }
+
+ return CMD_SUCCESS;
+}
+
+ALIAS (no_bgp_maxpaths,
+ no_bgp_maxpaths_arg_cmd,
+ "no maximum-paths <1-255>",
+ NO_STR
+ "Forward packets over multiple paths\n"
+ "Number of paths\n")
+
+DEFUN (no_bgp_maxpaths_ibgp,
+ no_bgp_maxpaths_ibgp_cmd,
+ "no maximum-paths ibgp",
+ NO_STR
+ "Forward packets over multiple paths\n"
+ "iBGP-multipath\n"
+ "Number of paths\n")
+{
+ struct bgp *bgp;
+ int ret;
+
+ bgp = vty->index;
+
+ ret = bgp_maximum_paths_unset (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+ BGP_PEER_IBGP);
+ if (ret < 0)
+ {
+ vty_out (vty,
+ "%% Failed to unset maximum-paths ibgp for afi %u, safi %u%s",
+ bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+ return CMD_WARNING;
+ }
+
+ return CMD_SUCCESS;
+}
+
+ALIAS (no_bgp_maxpaths_ibgp,
+ no_bgp_maxpaths_ibgp_arg_cmd,
+ "no maximum-paths ibgp <1-255>",
+ NO_STR
+ "Forward packets over multiple paths\n"
+ "iBGP-multipath\n"
+ "Number of paths\n")
+
+int
+bgp_config_write_maxpaths (struct vty *vty, struct bgp *bgp, afi_t afi,
+ safi_t safi, int *write)
+{
+ if (bgp->maxpaths[afi][safi].maxpaths_ebgp != BGP_DEFAULT_MAXPATHS)
+ {
+ bgp_config_write_family_header (vty, afi, safi, write);
+ vty_out (vty, " maximum-paths %d%s",
+ bgp->maxpaths[afi][safi].maxpaths_ebgp, VTY_NEWLINE);
+ }
+
+ if (bgp->maxpaths[afi][safi].maxpaths_ibgp != BGP_DEFAULT_MAXPATHS)
+ {
+ bgp_config_write_family_header (vty, afi, safi, write);
+ vty_out (vty, " maximum-paths ibgp %d%s",
+ bgp->maxpaths[afi][safi].maxpaths_ibgp, VTY_NEWLINE);
+ }
+
+ return 0;
+}
+
/* BGP timers. */
DEFUN (bgp_timers,
@@ -8937,6 +9081,20 @@ bgp_vty_init (void)
install_element (BGP_NODE, &bgp_confederation_peers_cmd);
install_element (BGP_NODE, &no_bgp_confederation_peers_cmd);
+ /* "maximum-paths" commands. */
+ install_element (BGP_NODE, &bgp_maxpaths_cmd);
+ install_element (BGP_NODE, &no_bgp_maxpaths_cmd);
+ install_element (BGP_NODE, &no_bgp_maxpaths_arg_cmd);
+ install_element (BGP_IPV4_NODE, &bgp_maxpaths_cmd);
+ install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_cmd);
+ install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_arg_cmd);
+ install_element (BGP_NODE, &bgp_maxpaths_ibgp_cmd);
+ install_element (BGP_NODE, &no_bgp_maxpaths_ibgp_cmd);
+ install_element (BGP_NODE, &no_bgp_maxpaths_ibgp_arg_cmd);
+ install_element (BGP_IPV4_NODE, &bgp_maxpaths_ibgp_cmd);
+ install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_ibgp_cmd);
+ install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_ibgp_arg_cmd);
+
/* "timers bgp" commands. */
install_element (BGP_NODE, &bgp_timers_cmd);
install_element (BGP_NODE, &no_bgp_timers_cmd);
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index 20feba0f..5c0dbb88 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -37,11 +37,15 @@ Boston, MA 02111-1307, USA. */
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_mpath.h"
/* All information about zebra. */
struct zclient *zclient = NULL;
struct in_addr router_id_zebra;
+/* Growable buffer for nexthops sent to zebra */
+struct stream *bgp_nexthop_buf = NULL;
+
/* Router-id update message from zebra. */
static int
bgp_router_id_update (int command, struct zclient *zclient, zebra_size_t length)
@@ -648,6 +652,8 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa
int flags;
u_char distance;
struct peer *peer;
+ struct bgp_info *mpinfo;
+ size_t oldsize, newsize;
if (zclient->sock < 0)
return;
@@ -668,6 +674,21 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa
|| CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK))
SET_FLAG (flags, ZEBRA_FLAG_INTERNAL);
+ /* resize nexthop buffer size if necessary */
+ if ((oldsize = stream_get_size (bgp_nexthop_buf)) <
+ (sizeof (struct in_addr *) * (bgp_info_mpath_count (info) + 1)))
+ {
+ newsize = (sizeof (struct in_addr *) * (bgp_info_mpath_count (info) + 1));
+ newsize = stream_resize (bgp_nexthop_buf, newsize);
+ if (newsize == oldsize)
+ {
+ zlog_err ("can't resize nexthop buffer");
+ return;
+ }
+ }
+
+ stream_reset (bgp_nexthop_buf);
+
if (p->family == AF_INET)
{
struct zapi_ipv4 api;
@@ -675,13 +696,20 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa
api.flags = flags;
nexthop = &info->attr->nexthop;
+ stream_put (bgp_nexthop_buf, &nexthop, sizeof (struct in_addr *));
+ for (mpinfo = bgp_info_mpath_first (info); mpinfo;
+ mpinfo = bgp_info_mpath_next (mpinfo))
+ {
+ nexthop = &mpinfo->attr->nexthop;
+ stream_put (bgp_nexthop_buf, &nexthop, sizeof (struct in_addr *));
+ }
api.type = ZEBRA_ROUTE_BGP;
api.message = 0;
api.safi = safi;
SET_FLAG (api.message, ZAPI_MESSAGE_NEXTHOP);
- api.nexthop_num = 1;
- api.nexthop = &nexthop;
+ api.nexthop_num = 1 + bgp_info_mpath_count (info);
+ api.nexthop = (struct in_addr **)STREAM_DATA (bgp_nexthop_buf);
api.ifindex_num = 0;
SET_FLAG (api.message, ZAPI_MESSAGE_METRIC);
api.metric = info->attr->med;
@@ -696,12 +724,18 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info *info, struct bgp *bgp, sa
if (BGP_DEBUG(zebra, ZEBRA))
{
+ int i;
char buf[2][INET_ADDRSTRLEN];
- zlog_debug("Zebra send: IPv4 route add %s/%d nexthop %s metric %u",
+ zlog_debug("Zebra send: IPv4 route add %s/%d nexthop %s metric %u"
+ " count %d",
inet_ntop(AF_INET, &p->u.prefix4, buf[0], sizeof(buf[0])),
p->prefixlen,
- inet_ntop(AF_INET, nexthop, buf[1], sizeof(buf[1])),
- api.metric);
+ inet_ntop(AF_INET, api.nexthop[0], buf[1], sizeof(buf[1])),
+ api.metric, api.nexthop_num);
+ for (i = 1; i < api.nexthop_num; i++)
+ zlog_debug("Zebra send: IPv4 route add [nexthop %d] %s",
+ i, inet_ntop(AF_INET, api.nexthop[i], buf[1],
+ sizeof(buf[1])));
}
zapi_ipv4_route (ZEBRA_IPV4_ROUTE_ADD, zclient,
@@ -1050,4 +1084,6 @@ bgp_zebra_init (void)
/* Interface related init. */
if_init ();
+
+ bgp_nexthop_buf = stream_new(BGP_NEXTHOP_BUF_SIZE);
}
diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h
index 1351333f..80991930 100644
--- a/bgpd/bgp_zebra.h
+++ b/bgpd/bgp_zebra.h
@@ -21,8 +21,14 @@ Boston, MA 02111-1307, USA. */
#ifndef _QUAGGA_BGP_ZEBRA_H
#define _QUAGGA_BGP_ZEBRA_H
+#define BGP_NEXTHOP_BUF_SIZE (8 * sizeof (struct in_addr *))
+
+extern struct stream *bgp_nexthop_buf;
+
extern void bgp_zebra_init (void);
extern int bgp_if_update_all (void);
+extern int bgp_config_write_maxpaths (struct vty *, struct bgp *, afi_t,
+ safi_t, int *);
extern int bgp_config_write_redistribute (struct vty *, struct bgp *, afi_t, safi_t,
int *);
extern void bgp_zebra_announce (struct prefix *, struct bgp_info *, struct bgp *, safi_t);
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 5dc014be..9c8eda88 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -57,6 +57,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "bgpd/bgp_advertise.h"
#include "bgpd/bgp_network.h"
#include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_mpath.h"
#ifdef HAVE_SNMP
#include "bgpd/bgp_snmp.h"
#endif /* HAVE_SNMP */
@@ -1947,6 +1948,8 @@ bgp_create (as_t *as, const char *name)
bgp->route[afi][safi] = bgp_table_init (afi, safi);
bgp->aggregate[afi][safi] = bgp_table_init (afi, safi);
bgp->rib[afi][safi] = bgp_table_init (afi, safi);
+ bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
+ bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
}
bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF;
@@ -5121,6 +5124,9 @@ bgp_config_write_family (struct vty *vty, struct bgp *bgp, afi_t afi,
}
}
}
+
+ bgp_config_write_maxpaths (vty, bgp, afi, safi, &write);
+
if (write)
vty_out (vty, " exit-address-family%s", VTY_NEWLINE);
@@ -5294,6 +5300,9 @@ bgp_config_write (struct vty *vty)
bgp_config_write_peer (vty, bgp, peer, AFI_IP, SAFI_UNICAST);
}
+ /* maximum-paths */
+ bgp_config_write_maxpaths (vty, bgp, AFI_IP, SAFI_UNICAST, &write);
+
/* Distance configuration. */
bgp_config_write_distance (vty, bgp);
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 892e7dec..09a3435c 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -162,6 +162,12 @@ struct bgp
/* BGP graceful restart */
u_int32_t restart_time;
u_int32_t stalepath_time;
+
+ /* Maximum-paths configuration */
+ struct bgp_maxpaths_cfg {
+ u_int16_t maxpaths_ebgp;
+ u_int16_t maxpaths_ibgp;
+ } maxpaths[AFI_MAX][SAFI_MAX];
};
/* BGP peer-group support. */